# pip install tensorflow
# conda install pandas
# conda install seaborn
# conda install scikit-learn
# conda install plotly
# pip install --upgrade nbformat
# conda install Jinja2
# pip install keras-cv
# pip install tensorflow_datasets
| Label | Description |
|---|---|
| 0 | T-shirt/Top |
| 1 | Trouser |
| 2 | Pullover |
| 3 | Dress |
| 4 | Coat |
| 5 | Sandal |
| 6 | Shirt |
| 7 | Sneaker |
| 8 | Bag |
| 9 | Ankle Boot |
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from keras.datasets import fashion_mnist
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Conv2D,Flatten,MaxPooling2D,Dense,Flatten,Dropout,BatchNormalization,LeakyReLU
from numpy import mean
from numpy import std
from matplotlib import pyplot
from sklearn.model_selection import KFold
from keras.utils import to_categorical
from keras.models import Sequential
from keras.callbacks import EarlyStopping,ModelCheckpoint,ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model
from keras import regularizers
from sklearn.model_selection import train_test_split
from numpy import mean
from numpy import std
from matplotlib import pyplot
from keras.optimizers import SGD
import gc
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import precision_score
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train , X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=13, stratify=y_train)
# we have 80% train,10% test,10% validation
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)
X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)
X_val = X_val.reshape(-1, 784)
(48000, 28, 28) (10000, 28, 28) (48000,) (10000,)
X_train_df = pd.DataFrame(X_train.reshape(-1, 784))
y_train_df = pd.DataFrame(y_train)
X_train_df
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 774 | 775 | 776 | 777 | 778 | 779 | 780 | 781 | 782 | 783 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | ... | 0 | 0 | 0 | 69 | 85 | 19 | 0 | 0 | 0 | 0 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 223 | 154 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 47995 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 87 | 217 | 184 | 0 | 0 | 0 | 0 |
| 47996 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | ... | 60 | 50 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
| 47997 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 47998 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 47999 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 14 | ... | 0 | 0 | 0 | 4 | 131 | 125 | 55 | 0 | 0 | 0 |
48000 rows × 784 columns
Dividing each of the pixels by 255 will normalize the pixels between 0 to 1 We normalize the pixels so that it can increase the speed of the learning process Neural Network processes inputs uses small weights values. Large inputs can disrupt or slow down learning process. It is good that we normalize the pixels.
print(X_train.min(), X_train.max())
# it seems like the data set provided by tensorflow is already between 0 to 255 . We can normalize it by dividing by 255 or so called pixel normalization
X_train = X_train / 255.0
X_test = X_test / 255.0
X_val = X_val / 255.0
0 255
number of data points for each class is exactly the same. This is so that there will not be biases for each class
types = y_train_df[0].unique()
types.sort()
print(types)
print(y_train_df[0].value_counts())
# y_ train is just an array of numbers from 0 to 9 that represent the class of the image
[0 1 2 3 4 5 6 7 8 9] 5 4800 4 4800 3 4800 7 4800 2 4800 9 4800 8 4800 0 4800 1 4800 6 4800 Name: 0, dtype: int64
class_names = ['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
print(class_names)
['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
array_of_each_class_position = [1,21,5,20,24,9,18,6,23,0]
# plot all images in array_of_each_class_position
plt.figure(figsize=(10,10))
for i in range(len(array_of_each_class_position)):
plt.subplot(5,2,i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(X_train_df.iloc[array_of_each_class_position[i]].values.reshape(28,28), cmap=plt.cm.binary)
plt.xlabel(class_names[y_train_df.iloc[array_of_each_class_position[i]][0]])
plt.show()
y_test_labels = y_test
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
y_val = to_categorical(y_val)
X_train: uint8 NumPy array of grayscale image data with shapes (, 28, 28), containing the training data.
y_train: uint8 NumPy array of labels (integers in range 0-9) with shape (60000,) for the training data.
X_test: uint8 NumPy array of grayscale image data with shapes (10000, 28, 28), containing the test data.
y_test: uint8 NumPy array of labels (integers in range 0-9) with shape (10000,) for the test data.

# Baseline simple neural network
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.metrics import f1_score,precision_score,recall_score
# fix random seed for reproducibility
seed = 1
np.random.seed(seed)
model = Sequential()
model.add(Dense(128, input_shape=(784,), activation='relu'))
model.add(Dense(10, activation='sigmoid'))
model.summary()
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(X_train, y_train, epochs=5, verbose=2)
results=model.evaluate(X_test, y_test,verbose=2)
print("test loss,", results[0], "test acc:", results[1])
# Train your model and save its history
def plot_loss(loss,test_loss):
plt.figure()
plt.plot(loss)
plt.plot(test_loss)
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()
def plot_accuracy(acc,test_acc):
plt.figure()
plt.plot(acc)
plt.plot(test_acc)
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
h_callback = model.fit(X_train, y_train, epochs = 10,
validation_data=(X_val, y_val))
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
# Plot train vs test accuracy during training
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
# Evaluate your model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print('\nTest accuracy:', test_acc)
# function to get precision, recall and f1 score
def get_metrics(model,X_test = X_test):
predictions = np.argmax(model.predict(X_test), axis=-1)
precision = precision_score(y_test_labels, predictions, average='macro')
recall = recall_score(y_test_labels, predictions, average='macro')
f1 = f1_score(y_test_labels, predictions, average='macro')
return precision,recall,f1
precision,recall,f1 = get_metrics(model)
# Dataframe to keep track of all model scores
Model_scores = pd.DataFrame([['Dense 1 layer NN',test_acc,test_loss,precision,recall,f1]],columns=['Model','Accuracy','Loss','Precision','Recall','F1 Score'])
# memory leak
gc.collect()
tf.keras.backend.clear_session()
del model
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 128) 100480
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
1500/1500 - 5s - loss: 0.5122 - accuracy: 0.8189 - 5s/epoch - 3ms/step
Epoch 2/5
1500/1500 - 3s - loss: 0.3853 - accuracy: 0.8613 - 3s/epoch - 2ms/step
Epoch 3/5
1500/1500 - 3s - loss: 0.3477 - accuracy: 0.8722 - 3s/epoch - 2ms/step
Epoch 4/5
1500/1500 - 3s - loss: 0.3223 - accuracy: 0.8812 - 3s/epoch - 2ms/step
Epoch 5/5
1500/1500 - 3s - loss: 0.3061 - accuracy: 0.8880 - 3s/epoch - 2ms/step
313/313 - 1s - loss: 0.3832 - accuracy: 0.8638 - 746ms/epoch - 2ms/step
test loss, 0.3831861913204193 test acc: 0.8637999892234802
Epoch 1/10
1500/1500 [==============================] - 5s 3ms/step - loss: 0.2897 - accuracy: 0.8933 - val_loss: 0.3144 - val_accuracy: 0.8878
Epoch 2/10
1500/1500 [==============================] - 4s 3ms/step - loss: 0.2731 - accuracy: 0.8990 - val_loss: 0.3102 - val_accuracy: 0.8847
Epoch 3/10
1500/1500 [==============================] - 4s 3ms/step - loss: 0.2629 - accuracy: 0.9019 - val_loss: 0.3261 - val_accuracy: 0.8854
Epoch 4/10
1500/1500 [==============================] - 5s 3ms/step - loss: 0.2527 - accuracy: 0.9066 - val_loss: 0.3150 - val_accuracy: 0.8855
Epoch 5/10
1500/1500 [==============================] - 5s 3ms/step - loss: 0.2436 - accuracy: 0.9096 - val_loss: 0.3037 - val_accuracy: 0.8913
Epoch 6/10
1500/1500 [==============================] - 5s 3ms/step - loss: 0.2341 - accuracy: 0.9129 - val_loss: 0.3061 - val_accuracy: 0.8942
Epoch 7/10
1500/1500 [==============================] - 5s 3ms/step - loss: 0.2258 - accuracy: 0.9165 - val_loss: 0.3010 - val_accuracy: 0.8932
Epoch 8/10
1500/1500 [==============================] - 4s 3ms/step - loss: 0.2174 - accuracy: 0.9204 - val_loss: 0.3089 - val_accuracy: 0.8906
Epoch 9/10
1500/1500 [==============================] - 4s 3ms/step - loss: 0.2107 - accuracy: 0.9221 - val_loss: 0.3124 - val_accuracy: 0.8917
Epoch 10/10
1500/1500 [==============================] - 5s 3ms/step - loss: 0.2038 - accuracy: 0.9235 - val_loss: 0.3239 - val_accuracy: 0.8854
313/313 - 1s - loss: 0.3666 - accuracy: 0.8794 - 745ms/epoch - 2ms/step Test accuracy: 0.8794000148773193 313/313 [==============================] - 0s 1ms/step
Baseline model with at least 2 layers which performs slightly better than 1 layer However 2 layered model is clearly overfitted where testing learning curve diverges away from training learning curve and only performs a little better than model with 1 layer
model = Sequential()
model.add(Dense(128, input_shape=(784,), activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(10, activation='sigmoid'))
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()
h_callback = model.fit(X_train, y_train,epochs = 5,
validation_data=(X_val, y_val))
# Evaluate your model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
# Plot train vs test accuracy during training
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
precision,recall,f1 = get_metrics(model)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['Dense NN 2 layer',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
# memory leak
gc.collect()
tf.keras.backend.clear_session()
del model
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 128) 100480
dense_1 (Dense) (None, 64) 8256
dense_2 (Dense) (None, 10) 650
=================================================================
Total params: 109,386
Trainable params: 109,386
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
1500/1500 [==============================] - 6s 4ms/step - loss: 0.5135 - accuracy: 0.8166 - val_loss: 0.4220 - val_accuracy: 0.8398
Epoch 2/5
1500/1500 [==============================] - 5s 4ms/step - loss: 0.3798 - accuracy: 0.8603 - val_loss: 0.3685 - val_accuracy: 0.8677
Epoch 3/5
1500/1500 [==============================] - 5s 3ms/step - loss: 0.3435 - accuracy: 0.8722 - val_loss: 0.4063 - val_accuracy: 0.8532
Epoch 4/5
1500/1500 [==============================] - 5s 3ms/step - loss: 0.3220 - accuracy: 0.8810 - val_loss: 0.3199 - val_accuracy: 0.8832
Epoch 5/5
1500/1500 [==============================] - 5s 3ms/step - loss: 0.3005 - accuracy: 0.8874 - val_loss: 0.3093 - val_accuracy: 0.8886
313/313 - 1s - loss: 0.3474 - accuracy: 0.8748 - 783ms/epoch - 3ms/step
313/313 [==============================] - 0s 1ms/step
model = Sequential()
model.add(Dense(128, input_shape=(784,), activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(10, activation='sigmoid'))
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()
h_callback = model.fit(X_train, y_train,epochs = 10,
validation_data=(X_val, y_val))
# Evaluate your model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
# Plot train vs test accuracy during training
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
precision,recall,f1 = get_metrics(model)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['Dense NN 3 layer',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
pred=model.predict(X_test)
pred=np.argmax(pred,axis=1)
classifation_matrix=confusion_matrix(y_test_labels,pred)
plt.figure(figsize=(10,10))
sns.heatmap(classifation_matrix,annot=True,fmt='d')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()
# memory leak
gc.collect()
tf.keras.backend.clear_session()
del model
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 128) 100480
dense_1 (Dense) (None, 64) 8256
dense_2 (Dense) (None, 32) 2080
dense_3 (Dense) (None, 10) 330
=================================================================
Total params: 111,146
Trainable params: 111,146
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
1500/1500 [==============================] - 6s 4ms/step - loss: 0.5380 - accuracy: 0.8088 - val_loss: 0.4011 - val_accuracy: 0.8535
Epoch 2/10
1500/1500 [==============================] - 6s 4ms/step - loss: 0.3919 - accuracy: 0.8573 - val_loss: 0.3912 - val_accuracy: 0.8588
Epoch 3/10
1500/1500 [==============================] - 6s 4ms/step - loss: 0.3534 - accuracy: 0.8696 - val_loss: 0.3977 - val_accuracy: 0.8562
Epoch 4/10
1500/1500 [==============================] - 6s 4ms/step - loss: 0.3299 - accuracy: 0.8782 - val_loss: 0.3374 - val_accuracy: 0.8773
Epoch 5/10
1500/1500 [==============================] - 6s 4ms/step - loss: 0.3111 - accuracy: 0.8855 - val_loss: 0.3703 - val_accuracy: 0.8662
Epoch 6/10
1500/1500 [==============================] - 6s 4ms/step - loss: 0.2963 - accuracy: 0.8890 - val_loss: 0.3286 - val_accuracy: 0.8822
Epoch 7/10
1500/1500 [==============================] - 6s 4ms/step - loss: 0.2809 - accuracy: 0.8946 - val_loss: 0.3130 - val_accuracy: 0.8911
Epoch 8/10
1500/1500 [==============================] - 6s 4ms/step - loss: 0.2689 - accuracy: 0.8993 - val_loss: 0.3152 - val_accuracy: 0.8866
Epoch 9/10
1500/1500 [==============================] - 6s 4ms/step - loss: 0.2604 - accuracy: 0.9019 - val_loss: 0.3097 - val_accuracy: 0.8863
Epoch 10/10
1500/1500 [==============================] - 6s 4ms/step - loss: 0.2507 - accuracy: 0.9044 - val_loss: 0.3020 - val_accuracy: 0.8903
313/313 - 1s - loss: 0.3462 - accuracy: 0.8755 - 807ms/epoch - 3ms/step
313/313 [==============================] - 0s 1ms/step 313/313 [==============================] - 0s 1ms/step
pros
cons

Saily Shah — Published On January 27, 2022 and Last Modified On March 15th, 2022 Shah,S Convolutional Neural Network: An Overview Available at: https://towardsdatascience.com/convolutional-neural-network-feature-map-and-filter-visualization-f75012a5a49c[Accessed : 4 november 2022 ]
Convolve ~ combine (one function or series) with another by forming their convolution which is also summing the element-wise product of 2 matrices (Shah,S 2022)

Striding convolutions : repeating what happen above but with strides; the filter moving through the image and getting a matrix of convolved features. The main purpose of doing this is to find features of a class which will then be used for classification of the images
- final matrix(convolved matrix) produced is smaller after retrieving main features of an image
Convolving helps neural network extract features performing better than MLP.
# Functions for plotting
from plotly.subplots import make_subplots
from plotly import tools
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
def create_trace(x,y,ylabel,color):
trace = go.Scatter(
x = x,y = y,
name=ylabel,
marker=dict(color=color),
mode = "markers+lines",
text=x
)
return trace
def plot_accuracy_and_loss(train_model):
hist = train_model.history
acc = hist['accuracy']
val_acc = hist['val_accuracy']
loss = hist['loss']
val_loss = hist['val_loss']
epochs = list(range(1,len(acc)+1))
trace_ta = create_trace(epochs,acc,"Training accuracy", "Green")
trace_va = create_trace(epochs,val_acc,"Validation accuracy", "Red")
trace_tl = create_trace(epochs,loss,"Training loss", "Blue")
trace_vl = create_trace(epochs,val_loss,"Validation loss", "Magenta")
fig = make_subplots(rows=1,cols=2, subplot_titles=('Training and validation accuracy',
'Training and validation loss'))
fig.append_trace(trace_ta,1,1)
fig.append_trace(trace_va,1,1)
fig.append_trace(trace_tl,1,2)
fig.append_trace(trace_vl,1,2)
fig['layout']['xaxis'].update(title = 'Epoch')
fig['layout']['xaxis2'].update(title = 'Epoch')
fig['layout']['yaxis'].update(title = 'Accuracy', range=[0,1])
fig['layout']['yaxis2'].update(title = 'Loss', range=[0,1])
iplot(fig, filename=f'accuracy-loss_{train_model}')
from tensorflow.keras.layers import Conv2D,Flatten
from numpy import mean
from numpy import std
from matplotlib import pyplot
from sklearn.model_selection import KFold
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.layers import Activation
from keras.layers import LeakyReLU
epochs = 15
num_classes = 10
seed = np.random.seed(1)
print('before',X_train.shape,X_test.shape)
print('before',y_train.shape,y_test.shape)
X_train = X_train.reshape((X_train.shape[0], 28, 28, 1))
X_test = X_test.reshape((X_test.shape[0], 28, 28, 1))
X_val = X_val.reshape((X_val.shape[0], 28, 28, 1))
print('after',X_train.shape,X_test.shape)
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
fashion_model.add(Conv2D(64, (3, 3), activation='linear'))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='linear'))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
h_callback = fashion_model.fit(X_train, y_train, epochs = 10,
validation_data=(X_val, y_val))
# Evaluate your model on the test set
test_loss, test_acc = fashion_model.evaluate(X_test, y_test, verbose=2)
# Plot train vs test loss during training
plot_accuracy_and_loss(h_callback)
# function to get precision, recall and f1 score
def get_metrics(model,X_test):
predictions = np.argmax(model.predict(X_test), axis=-1)
precision = precision_score(y_test_labels, predictions, average='macro')
recall = recall_score(y_test_labels, predictions, average='macro')
f1 = f1_score(y_test_labels, predictions, average='macro')
return precision,recall,f1
# Dataframe to keep track of all model scores
precision,recall,f1 = get_metrics(fashion_model,X_test)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['CNN linear activation',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
before (48000, 784) (10000, 784)
before (48000, 10) (10000, 10)
after (48000, 28, 28, 1) (10000, 28, 28, 1)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
conv2d_1 (Conv2D) (None, 24, 24, 64) 18496
flatten (Flatten) (None, 36864) 0
dense (Dense) (None, 128) 4718720
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 4,738,826
Trainable params: 4,738,826
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
1500/1500 [==============================] - 11s 5ms/step - loss: 0.5581 - accuracy: 0.8115 - val_loss: 0.4680 - val_accuracy: 0.8393
Epoch 2/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4641 - accuracy: 0.8382 - val_loss: 0.4647 - val_accuracy: 0.8410
Epoch 3/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4495 - accuracy: 0.8430 - val_loss: 0.4689 - val_accuracy: 0.8392
Epoch 4/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4409 - accuracy: 0.8446 - val_loss: 0.4834 - val_accuracy: 0.8388
Epoch 5/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4307 - accuracy: 0.8486 - val_loss: 0.4672 - val_accuracy: 0.8413
Epoch 6/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4243 - accuracy: 0.8504 - val_loss: 0.4601 - val_accuracy: 0.8421
Epoch 7/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4179 - accuracy: 0.8525 - val_loss: 0.4567 - val_accuracy: 0.8479
Epoch 8/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4136 - accuracy: 0.8534 - val_loss: 0.4624 - val_accuracy: 0.8457
Epoch 9/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4106 - accuracy: 0.8563 - val_loss: 0.4651 - val_accuracy: 0.8428
Epoch 10/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4049 - accuracy: 0.8557 - val_loss: 0.4748 - val_accuracy: 0.8313
313/313 - 1s - loss: 0.4994 - accuracy: 0.8195 - 910ms/epoch - 3ms/step
313/313 [==============================] - 1s 2ms/step
#Understand the filters in the model
#Let us pick the first hidden layer as the layer of interest.
layer = fashion_model.layers #Conv layers at 0,
filters, biases = fashion_model.layers[0].get_weights()
print(layer[0].name, filters.shape)
# plot filters
fig1=plt.figure(figsize=(8, 12))
columns = 8
rows = 8
n_filters = 32 ## the number of filters in our first layer
for i in range(1, n_filters + 1):
f = filters[:, :, :, i-1]
fig1 =plt.subplot(rows, columns, i)
fig1.set_xticks([]) #Turn off axis
fig1.set_yticks([])
plt.imshow(f[:, :, 0], cmap='gray') #Show only the filters from 0th channel (R)
#ix += 1
plt.show()
pred=fashion_model.predict(X_test)
pred=np.argmax(pred,axis=1)
classifation_matrix=confusion_matrix(y_test_labels,pred)
plt.figure(figsize=(10,10))
sns.heatmap(classifation_matrix,annot=True,fmt='d')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()
# memory leak
gc.collect()
tf.keras.backend.clear_session()
del fashion_model
conv2d (3, 3, 1, 32)
313/313 [==============================] - 1s 2ms/step
Machines understand binary and it makes more sense to one_hot_encode to change it to an array of 0 and 1s instead of a range of 0 to 9
Its amazing how the filter is extracting key information on the various classes, showing the lines edge and features of the images it is looking out for
# baseline cnn model for fashion mnist
from numpy import mean
from numpy import std
from matplotlib import pyplot
from sklearn.model_selection import KFold
from keras.datasets import fashion_mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping
# evaluate a model using k-fold cross-validation
def evaluate_model(model,dataX, dataY, n_folds=5,valX=X_val,valy=y_val):
scores, histories = list(), list()
# prepare cross validation
kfold = KFold(n_folds, shuffle=True, random_state=1)
# enumerate splits
for train_ix, test_ix in kfold.split(dataX):
# select rows for train and test
trainX, trainY, testX, testY = dataX[train_ix], dataY[train_ix], dataX[test_ix], dataY[test_ix]
h_callback = EarlyStopping(monitor='val_loss', patience=5)
# fit model
history = model.fit(trainX, trainY, epochs=50, validation_data=(testX, testY), verbose=0,callbacks=[h_callback])
# evaluate model
_, acc = model.evaluate(valX, valy, verbose=0)
print('> %.3f' % (acc * 100.0))
# append scores
scores.append(acc)
histories.append(history)
return scores, histories
# plot diagnostic learning curves
def summarize_diagnostics(histories):
for i in range(len(histories)):
# plot loss
pyplot.subplot(211)
pyplot.title('Cross Entropy Loss')
pyplot.plot(histories[i].history['loss'], color='blue', label='train')
pyplot.plot(histories[i].history['val_loss'], color='orange', label='test')
# plot accuracy
pyplot.subplot(212)
pyplot.title('Classification Accuracy')
pyplot.plot(histories[i].history['accuracy'], color='blue', label='train')
pyplot.plot(histories[i].history['val_accuracy'], color='orange', label='test')
pyplot.legend()
pyplot.show()
# summarize model performance
def summarize_performance(scores):
# print summary
print('Accuracy: mean=%.3f std=%.3f, n=%d' % (mean(scores)*100, std(scores)*100, len(scores)))
# box and whisker plots of results
pyplot.boxplot(scores)
pyplot.show()
# run the test harness for evaluating a model
def run_test_harness(model,X_train, y_train):
# evaluate model
scores, histories= evaluate_model(model,X_train, y_train)
# learning curves
summarize_diagnostics(histories)
# summarize estimated performance
summarize_performance(scores)
del model
gc.collect()
tf.keras.backend.clear_session()

Softmax produces a probability score for all 10 classes . If the model is not too confident about the choice it makes , it will be summed up and added to logloss. The more uncertain the model is the higher the logloss

My hypothesis is that max pooling will be better for this dataset as it completely wipes out noise in a region by taking brightest
def model_max_pooling():
# define model
max_pool_model = Sequential()
max_pool_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
max_pool_model.add(MaxPooling2D((2, 2)))
max_pool_model.add(Flatten())
max_pool_model.add(Dense(128, activation='linear'))
max_pool_model.add(Dense(num_classes, activation='softmax'))
# compile model
max_pool_model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
return max_pool_model
run_test_harness(model_max_pooling(),X_train, y_train)
model_max_pooling = model_max_pooling()
model_max_pooling.fit(X_train, y_train, epochs=50, validation_data=(X_val, y_val), verbose=0)
# Evaluate your model on the test set
test_loss, test_acc = model_max_pooling.evaluate(X_test, y_test, verbose=2)
precision,recall,f1 = get_metrics(model_max_pooling,X_test)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['CNN linear max pool',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
> 88.725 > 88.692 > 88.525 > 88.617 > 88.492
Accuracy: mean=88.610 std=0.091, n=5
313/313 - 1s - loss: 0.7008 - accuracy: 0.8778 - 719ms/epoch - 2ms/step 313/313 [==============================] - 0s 1ms/step
from keras.layers import AveragePooling2D
def model_average_pooling():
# define model
average_pool_model = Sequential()
average_pool_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
average_pool_model.add(AveragePooling2D((2, 2)))
average_pool_model.add(Flatten())
average_pool_model.add(Dense(128, activation='linear'))
average_pool_model.add(Dense(num_classes, activation='softmax'))
# compile model
average_pool_model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
return average_pool_model
run_test_harness(model_average_pooling(),X_train, y_train)
model_average_pooling = model_average_pooling()
model_average_pooling.fit(X_train, y_train, epochs=50, validation_data=(X_val, y_val), verbose=0)
# Evaluate your model on the test set
test_loss, test_acc = model_average_pooling.evaluate(X_test, y_test, verbose=2)
precision,recall,f1 = get_metrics(model_average_pooling,X_test)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['CNN linear avg pool',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
> 84.492 > 84.833 > 83.883 > 84.542 > 84.608
Accuracy: mean=84.472 std=0.317, n=5
313/313 - 1s - loss: 0.4945 - accuracy: 0.8325 - 806ms/epoch - 3ms/step 313/313 [==============================] - 0s 1ms/step
%%time
fashion_model_batch = Sequential()
fashion_model_batch.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
fashion_model_batch.add(Conv2D(64, (3, 3), activation='linear'))
fashion_model_batch.add(Flatten())
fashion_model_batch.add(Dense(128, activation='linear'))
fashion_model_batch.add(Dense(num_classes, activation='softmax'))
fashion_model_batch.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model_batch.summary()
h_callback = fashion_model_batch.fit(X_train, y_train, epochs = 10, batch_size=32,validation_data=(X_test, y_test))
# Plot train vs test loss during training
plot_accuracy_and_loss(h_callback)
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_1 (Conv2D) (None, 26, 26, 32) 320
conv2d_2 (Conv2D) (None, 24, 24, 64) 18496
flatten_1 (Flatten) (None, 36864) 0
dense_2 (Dense) (None, 128) 4718720
dense_3 (Dense) (None, 10) 1290
=================================================================
Total params: 4,738,826
Trainable params: 4,738,826
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.5740 - accuracy: 0.8109 - val_loss: 0.4825 - val_accuracy: 0.8331
Epoch 2/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4632 - accuracy: 0.8369 - val_loss: 0.5328 - val_accuracy: 0.8119
Epoch 3/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4477 - accuracy: 0.8437 - val_loss: 0.5017 - val_accuracy: 0.8260
Epoch 4/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4395 - accuracy: 0.8457 - val_loss: 0.4855 - val_accuracy: 0.8285
Epoch 5/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4349 - accuracy: 0.8470 - val_loss: 0.4807 - val_accuracy: 0.8313
Epoch 6/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4247 - accuracy: 0.8504 - val_loss: 0.4959 - val_accuracy: 0.8248
Epoch 7/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4201 - accuracy: 0.8528 - val_loss: 0.5079 - val_accuracy: 0.8260
Epoch 8/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4147 - accuracy: 0.8533 - val_loss: 0.4759 - val_accuracy: 0.8346
Epoch 9/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4107 - accuracy: 0.8532 - val_loss: 0.4786 - val_accuracy: 0.8328
Epoch 10/10
1500/1500 [==============================] - 8s 5ms/step - loss: 0.4038 - accuracy: 0.8572 - val_loss: 0.4921 - val_accuracy: 0.8311
CPU times: total: 1min 42s Wall time: 1min 17s
%%time
fashion_model_batch = Sequential()
fashion_model_batch.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
fashion_model_batch.add(Conv2D(64, (3, 3), activation='linear'))
fashion_model_batch.add(Flatten())
fashion_model_batch.add(Dense(128, activation='linear'))
fashion_model_batch.add(Dense(num_classes, activation='softmax'))
fashion_model_batch.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model_batch.summary()
h_callback = fashion_model_batch.fit(X_train, y_train, epochs = 10, batch_size=64,validation_data=(X_test, y_test))
# Plot train vs test loss during training
plot_accuracy_and_loss(h_callback)
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_3 (Conv2D) (None, 26, 26, 32) 320
conv2d_4 (Conv2D) (None, 24, 24, 64) 18496
flatten_2 (Flatten) (None, 36864) 0
dense_4 (Dense) (None, 128) 4718720
dense_5 (Dense) (None, 10) 1290
=================================================================
Total params: 4,738,826
Trainable params: 4,738,826
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
750/750 [==============================] - 5s 6ms/step - loss: 0.5536 - accuracy: 0.8115 - val_loss: 0.5026 - val_accuracy: 0.8260
Epoch 2/10
750/750 [==============================] - 5s 6ms/step - loss: 0.4510 - accuracy: 0.8423 - val_loss: 0.4881 - val_accuracy: 0.8268
Epoch 3/10
750/750 [==============================] - 5s 6ms/step - loss: 0.4357 - accuracy: 0.8474 - val_loss: 0.4781 - val_accuracy: 0.8322
Epoch 4/10
750/750 [==============================] - 5s 6ms/step - loss: 0.4249 - accuracy: 0.8494 - val_loss: 0.4730 - val_accuracy: 0.8351
Epoch 5/10
750/750 [==============================] - 5s 6ms/step - loss: 0.4166 - accuracy: 0.8531 - val_loss: 0.4840 - val_accuracy: 0.8314
Epoch 6/10
750/750 [==============================] - 5s 6ms/step - loss: 0.4155 - accuracy: 0.8535 - val_loss: 0.4835 - val_accuracy: 0.8334
Epoch 7/10
750/750 [==============================] - 5s 6ms/step - loss: 0.4113 - accuracy: 0.8559 - val_loss: 0.4941 - val_accuracy: 0.8261
Epoch 8/10
750/750 [==============================] - 5s 6ms/step - loss: 0.4060 - accuracy: 0.8552 - val_loss: 0.5115 - val_accuracy: 0.8216
Epoch 9/10
750/750 [==============================] - 5s 6ms/step - loss: 0.4047 - accuracy: 0.8565 - val_loss: 0.5058 - val_accuracy: 0.8200
Epoch 10/10
750/750 [==============================] - 5s 6ms/step - loss: 0.3980 - accuracy: 0.8583 - val_loss: 0.4929 - val_accuracy: 0.8236
CPU times: total: 1min 3s Wall time: 47 s
%%time
fashion_model_batch = Sequential()
fashion_model_batch.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
fashion_model_batch.add(Conv2D(64, (3, 3), activation='linear'))
fashion_model_batch.add(Flatten())
fashion_model_batch.add(Dense(128, activation='linear'))
fashion_model_batch.add(Dense(num_classes, activation='softmax'))
fashion_model_batch.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model_batch.summary()
h_callback = fashion_model_batch.fit(X_train, y_train, epochs = 10, batch_size=128,validation_data=(X_test, y_test))
# Plot train vs test loss during training
plot_accuracy_and_loss(h_callback)
Model: "sequential_3"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_5 (Conv2D) (None, 26, 26, 32) 320
conv2d_6 (Conv2D) (None, 24, 24, 64) 18496
flatten_3 (Flatten) (None, 36864) 0
dense_6 (Dense) (None, 128) 4718720
dense_7 (Dense) (None, 10) 1290
=================================================================
Total params: 4,738,826
Trainable params: 4,738,826
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
375/375 [==============================] - 3s 8ms/step - loss: 0.5419 - accuracy: 0.8165 - val_loss: 0.5008 - val_accuracy: 0.8217
Epoch 2/10
375/375 [==============================] - 3s 8ms/step - loss: 0.4444 - accuracy: 0.8451 - val_loss: 0.4920 - val_accuracy: 0.8293
Epoch 3/10
375/375 [==============================] - 3s 8ms/step - loss: 0.4254 - accuracy: 0.8519 - val_loss: 0.4762 - val_accuracy: 0.8323
Epoch 4/10
375/375 [==============================] - 3s 8ms/step - loss: 0.4166 - accuracy: 0.8535 - val_loss: 0.4813 - val_accuracy: 0.8329
Epoch 5/10
375/375 [==============================] - 3s 8ms/step - loss: 0.4095 - accuracy: 0.8550 - val_loss: 0.4663 - val_accuracy: 0.8375
Epoch 6/10
375/375 [==============================] - 3s 8ms/step - loss: 0.4016 - accuracy: 0.8580 - val_loss: 0.4831 - val_accuracy: 0.8243
Epoch 7/10
375/375 [==============================] - 3s 8ms/step - loss: 0.3981 - accuracy: 0.8582 - val_loss: 0.4743 - val_accuracy: 0.8346
Epoch 8/10
375/375 [==============================] - 3s 8ms/step - loss: 0.3983 - accuracy: 0.8577 - val_loss: 0.4871 - val_accuracy: 0.8327
Epoch 9/10
375/375 [==============================] - 3s 8ms/step - loss: 0.3918 - accuracy: 0.8609 - val_loss: 0.4826 - val_accuracy: 0.8301
Epoch 10/10
375/375 [==============================] - 3s 8ms/step - loss: 0.3912 - accuracy: 0.8607 - val_loss: 0.4853 - val_accuracy: 0.8253
CPU times: total: 39.8 s Wall time: 29.6 s
The batch size is a hyperparameter that defines the number of samples to work through before updating the internal model parameters. Instead of having to update only after the whole dataset is trained once. It is indeed better to update after training each batch.
smaller batch_size is will take more time though¶
# evaluate a model using k-fold cross-validation
def evaluate_model(model,dataX, dataY, n_folds=5, valX=X_val, valY=y_val):
scores, histories = list(), list()
# prepare cross validation
kfold = KFold(n_folds, shuffle=True, random_state=1)
# enumerate splits
for train_ix, test_ix in kfold.split(dataX):
# select rows for train and test
trainX, trainY, testX, testY = dataX[train_ix], dataY[train_ix], dataX[test_ix], dataY[test_ix]
# fit model
history = model.fit(trainX, trainY, epochs=100, batch_size=128,validation_data=(testX, testY), verbose=0, callbacks=[EarlyStopping(monitor='val_loss', patience=10)])
# evaluate model
_, acc = model.evaluate(valX, valY, verbose=0)
print('> %.3f' % (acc * 100.0))
# append scores
scores.append(acc)
histories.append(history)
return scores, histories
%%time
from keras.layers import BatchNormalization
fashion_model_batchnorm = Sequential()
fashion_model_batchnorm.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
fashion_model_batchnorm.add(Conv2D(64, (3, 3), activation='linear'))
fashion_model_batchnorm.add(BatchNormalization())
fashion_model_batchnorm.add(Flatten())
fashion_model_batchnorm.add(Dense(128, activation='linear'))
fashion_model_batchnorm.add(Dense(num_classes, activation='softmax'))
fashion_model_batchnorm.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model_batchnorm.summary()
h_callback = fashion_model_batchnorm.fit(X_train, y_train, epochs = 10, batch_size=64,validation_data=(X_test, y_test))
# Plot train vs test loss during training
plot_accuracy_and_loss(h_callback)
test_loss, test_acc = fashion_model_batchnorm.evaluate(X_test, y_test, verbose=2)
precision,recall,f1 = get_metrics(fashion_model_batchnorm,X_test)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['CNN linear w batch_norm',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
gc.collect()
tf.keras.backend.clear_session()
del fashion_model_batchnorm
Model: "sequential_4"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_7 (Conv2D) (None, 26, 26, 32) 320
conv2d_8 (Conv2D) (None, 24, 24, 64) 18496
batch_normalization (BatchN (None, 24, 24, 64) 256
ormalization)
flatten_4 (Flatten) (None, 36864) 0
dense_8 (Dense) (None, 128) 4718720
dense_9 (Dense) (None, 10) 1290
=================================================================
Total params: 4,739,082
Trainable params: 4,738,954
Non-trainable params: 128
_________________________________________________________________
Epoch 1/10
750/750 [==============================] - 6s 8ms/step - loss: 1.9582 - accuracy: 0.7830 - val_loss: 0.7046 - val_accuracy: 0.7371
Epoch 2/10
750/750 [==============================] - 5s 7ms/step - loss: 0.5037 - accuracy: 0.8297 - val_loss: 0.6205 - val_accuracy: 0.8026
Epoch 3/10
750/750 [==============================] - 5s 7ms/step - loss: 0.4678 - accuracy: 0.8383 - val_loss: 0.7073 - val_accuracy: 0.7762
Epoch 4/10
750/750 [==============================] - 5s 7ms/step - loss: 0.4517 - accuracy: 0.8418 - val_loss: 0.5403 - val_accuracy: 0.8183
Epoch 5/10
750/750 [==============================] - 5s 7ms/step - loss: 0.4466 - accuracy: 0.8450 - val_loss: 0.6507 - val_accuracy: 0.7800
Epoch 6/10
750/750 [==============================] - 5s 6ms/step - loss: 0.4504 - accuracy: 0.8430 - val_loss: 0.5470 - val_accuracy: 0.8169
Epoch 7/10
750/750 [==============================] - 5s 6ms/step - loss: 0.4408 - accuracy: 0.8458 - val_loss: 0.6144 - val_accuracy: 0.7737
Epoch 8/10
750/750 [==============================] - 5s 6ms/step - loss: 0.4366 - accuracy: 0.8465 - val_loss: 0.5811 - val_accuracy: 0.8057
Epoch 9/10
750/750 [==============================] - 5s 7ms/step - loss: 0.4315 - accuracy: 0.8492 - val_loss: 0.5063 - val_accuracy: 0.8239
Epoch 10/10
750/750 [==============================] - 5s 7ms/step - loss: 0.4222 - accuracy: 0.8496 - val_loss: 0.5164 - val_accuracy: 0.8208
313/313 - 1s - loss: 0.5164 - accuracy: 0.8210 - 868ms/epoch - 3ms/step 313/313 [==============================] - 1s 2ms/step CPU times: total: 1min 12s Wall time: 54.5 s
Since it regularized on an underfitted model I did not expect it to perform as well as the model without batch normalization but it proves that it can help with overfitting and speedier training
%%time
## Importing data and augmenting it
import keras
from keras.utils import to_categorical
from keras.datasets import fashion_mnist
from tensorflow.keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train , X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=13, stratify=y_train)
print(X_train.shape, X_val.shape, X_test.shape)
X_train = X_train / 255.0
X_test = X_test / 255.0
X_val = X_val / 255.0
print('before',X_train.shape,X_test.shape)
print('before',y_train.shape,y_test.shape)
X_train = X_train.reshape((X_train.shape[0], 28, 28, 1))
X_test = X_test.reshape((X_test.shape[0], 28, 28, 1))
X_val = X_val.reshape((X_val.shape[0], 28, 28, 1))
y_test_label = y_test
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
y_val = to_categorical(y_val)
# create a grid of 3x3 images
fig, ax = plt.subplots(3, 3, sharex=True, sharey=True, figsize=(4,4))
for i in range(3):
for j in range(3):
ax[i][j].imshow(X_train[i*3+j], cmap=plt.get_cmap("gray"))
# show the plot
plt.show()
from keras.layers import BatchNormalization
fashion_model_batch = Sequential()
fashion_model_batch.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1)))
fashion_model_batch.add(Conv2D(64, (3, 3), activation='linear'))
fashion_model_batch.add(BatchNormalization())
fashion_model_batch.add(Flatten())
fashion_model_batch.add(Dense(128, activation='linear'))
fashion_model_batch.add(Dense(10, activation='softmax'))
fashion_model_batch.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model_batch.summary()
early_stopping = EarlyStopping(monitor='val_loss', patience=20, verbose=1)
h_callback = fashion_model_batch.fit(X_train, y_train, epochs = 100, batch_size=128,validation_data=(X_val, y_val),callbacks=[early_stopping])
# Plot train vs test loss during training
plot_accuracy_and_loss(h_callback)
# evaluate on test set
score = fashion_model_batch.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
precision,recall,f1 = get_metrics(fashion_model_batch,X_test)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['linear batchnorm high epochs w rotated data aug',score[1],score[0],precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
del fashion_model_batch
gc.collect()
tf.keras.backend.clear_session()
(48000, 28, 28) (12000, 28, 28) (10000, 28, 28) before (48000, 28, 28) (10000, 28, 28) before (48000,) (10000,)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
conv2d_1 (Conv2D) (None, 24, 24, 64) 18496
batch_normalization (BatchN (None, 24, 24, 64) 256
ormalization)
flatten (Flatten) (None, 36864) 0
dense (Dense) (None, 128) 4718720
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 4,739,082
Trainable params: 4,738,954
Non-trainable params: 128
_________________________________________________________________
Epoch 1/100
375/375 [==============================] - 4s 10ms/step - loss: 2.2827 - accuracy: 0.7719 - val_loss: 0.9760 - val_accuracy: 0.7028
Epoch 2/100
375/375 [==============================] - 3s 9ms/step - loss: 0.5001 - accuracy: 0.8314 - val_loss: 0.6542 - val_accuracy: 0.7922
Epoch 3/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4660 - accuracy: 0.8385 - val_loss: 0.9012 - val_accuracy: 0.7490
Epoch 4/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4664 - accuracy: 0.8383 - val_loss: 0.5980 - val_accuracy: 0.8048
Epoch 5/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4481 - accuracy: 0.8444 - val_loss: 1.0325 - val_accuracy: 0.6701
Epoch 6/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4367 - accuracy: 0.8483 - val_loss: 0.5330 - val_accuracy: 0.8251
Epoch 7/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4273 - accuracy: 0.8511 - val_loss: 0.5552 - val_accuracy: 0.8106
Epoch 8/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4198 - accuracy: 0.8521 - val_loss: 0.8703 - val_accuracy: 0.7282
Epoch 9/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4164 - accuracy: 0.8533 - val_loss: 0.4857 - val_accuracy: 0.8338
Epoch 10/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4148 - accuracy: 0.8536 - val_loss: 0.5175 - val_accuracy: 0.8252
Epoch 11/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4130 - accuracy: 0.8534 - val_loss: 0.5508 - val_accuracy: 0.8211
Epoch 12/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4156 - accuracy: 0.8527 - val_loss: 0.4882 - val_accuracy: 0.8354
Epoch 13/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4106 - accuracy: 0.8544 - val_loss: 0.4895 - val_accuracy: 0.8293
Epoch 14/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4068 - accuracy: 0.8552 - val_loss: 0.6032 - val_accuracy: 0.8056
Epoch 15/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4098 - accuracy: 0.8559 - val_loss: 0.4715 - val_accuracy: 0.8428
Epoch 16/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4046 - accuracy: 0.8560 - val_loss: 0.4762 - val_accuracy: 0.8317
Epoch 17/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4001 - accuracy: 0.8578 - val_loss: 0.6921 - val_accuracy: 0.7713
Epoch 18/100
375/375 [==============================] - 3s 9ms/step - loss: 0.4018 - accuracy: 0.8573 - val_loss: 0.9302 - val_accuracy: 0.7113
Epoch 19/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3962 - accuracy: 0.8578 - val_loss: 0.6710 - val_accuracy: 0.7816
Epoch 20/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3944 - accuracy: 0.8595 - val_loss: 0.4930 - val_accuracy: 0.8366
Epoch 21/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3918 - accuracy: 0.8616 - val_loss: 0.4737 - val_accuracy: 0.8372
Epoch 22/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3881 - accuracy: 0.8622 - val_loss: 0.5034 - val_accuracy: 0.8287
Epoch 23/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3884 - accuracy: 0.8621 - val_loss: 0.5997 - val_accuracy: 0.7892
Epoch 24/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3851 - accuracy: 0.8624 - val_loss: 0.4783 - val_accuracy: 0.8293
Epoch 25/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3822 - accuracy: 0.8645 - val_loss: 0.4505 - val_accuracy: 0.8468
Epoch 26/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3806 - accuracy: 0.8645 - val_loss: 0.4538 - val_accuracy: 0.8487
Epoch 27/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3794 - accuracy: 0.8651 - val_loss: 0.4722 - val_accuracy: 0.8374
Epoch 28/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3787 - accuracy: 0.8663 - val_loss: 0.4543 - val_accuracy: 0.8472
Epoch 29/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3765 - accuracy: 0.8650 - val_loss: 0.4668 - val_accuracy: 0.8373
Epoch 30/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3739 - accuracy: 0.8665 - val_loss: 0.4737 - val_accuracy: 0.8426
Epoch 31/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3734 - accuracy: 0.8677 - val_loss: 0.4498 - val_accuracy: 0.8462
Epoch 32/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3723 - accuracy: 0.8669 - val_loss: 0.4579 - val_accuracy: 0.8472
Epoch 33/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3717 - accuracy: 0.8665 - val_loss: 0.4696 - val_accuracy: 0.8443
Epoch 34/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3695 - accuracy: 0.8693 - val_loss: 0.4573 - val_accuracy: 0.8457
Epoch 35/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3686 - accuracy: 0.8680 - val_loss: 0.4582 - val_accuracy: 0.8463
Epoch 36/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3667 - accuracy: 0.8702 - val_loss: 0.4628 - val_accuracy: 0.8470
Epoch 37/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3666 - accuracy: 0.8683 - val_loss: 0.4606 - val_accuracy: 0.8425
Epoch 38/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3660 - accuracy: 0.8695 - val_loss: 0.4918 - val_accuracy: 0.8311
Epoch 39/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3635 - accuracy: 0.8703 - val_loss: 0.4624 - val_accuracy: 0.8468
Epoch 40/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3638 - accuracy: 0.8702 - val_loss: 0.4676 - val_accuracy: 0.8450
Epoch 41/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3631 - accuracy: 0.8697 - val_loss: 0.4610 - val_accuracy: 0.8457
Epoch 42/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3632 - accuracy: 0.8692 - val_loss: 0.4700 - val_accuracy: 0.8448
Epoch 43/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3613 - accuracy: 0.8706 - val_loss: 0.5091 - val_accuracy: 0.8331
Epoch 44/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3601 - accuracy: 0.8709 - val_loss: 0.4588 - val_accuracy: 0.8503
Epoch 45/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3592 - accuracy: 0.8709 - val_loss: 0.4644 - val_accuracy: 0.8461
Epoch 46/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3586 - accuracy: 0.8730 - val_loss: 0.4766 - val_accuracy: 0.8477
Epoch 47/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3588 - accuracy: 0.8714 - val_loss: 0.4807 - val_accuracy: 0.8367
Epoch 48/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3565 - accuracy: 0.8719 - val_loss: 0.4641 - val_accuracy: 0.8480
Epoch 49/100
375/375 [==============================] - 3s 9ms/step - loss: 0.3587 - accuracy: 0.8720 - val_loss: 0.4623 - val_accuracy: 0.8475
Epoch 50/100
375/375 [==============================] - 3s 8ms/step - loss: 0.3574 - accuracy: 0.8714 - val_loss: 0.4859 - val_accuracy: 0.8392
Epoch 51/100
375/375 [==============================] - 4s 9ms/step - loss: 0.3588 - accuracy: 0.8705 - val_loss: 0.4691 - val_accuracy: 0.8415
Epoch 51: early stopping
Test loss: 0.49178609251976013 Test accuracy: 0.8300999999046326 313/313 [==============================] - 1s 2ms/step CPU times: total: 3min 54s Wall time: 2min 54s
Model_scores
| Model | Accuracy | Loss | Precision | Recall | F1 Score | |
|---|---|---|---|---|---|---|
| 0 | Dense 1 layer NN | 0.8794 | 0.366588 | 0.883387 | 0.8794 | 0.879616 |
| 1 | Dense NN 2 layer | 0.8748 | 0.347432 | 0.874826 | 0.8748 | 0.873875 |
| 2 | Dense NN 3 layer | 0.8755 | 0.346159 | 0.875871 | 0.8755 | 0.874770 |
| 3 | CNN linear activation | 0.8195 | 0.499430 | 0.829953 | 0.8195 | 0.821822 |
| 4 | CNN linear max pool | 0.8778 | 0.700755 | 0.879536 | 0.8778 | 0.878137 |
| 5 | CNN linear avg pool | 0.8325 | 0.494453 | 0.831806 | 0.8325 | 0.829783 |
| 6 | CNN linear w batch_norm | 0.8210 | 0.516410 | 0.823537 | 0.8210 | 0.818529 |
| 7 | linear batchnorm high epochs w rotated data aug | 0.8301 | 0.491786 | 0.829276 | 0.8301 | 0.828320 |
X_train_rotate = X_train.copy()
y_train_rotate = y_train.copy()
# define data preparation
datagen = ImageDataGenerator(rotation_range=90)
# fit parameters from data
datagen.fit(X_train_rotate)
# configure batch size and retrieve one batch of images
for X_batch, y_batch in datagen.flow(X_train_rotate, y_train_rotate, batch_size=9, shuffle=False):
# create a grid of 3x3 images
fig, ax = plt.subplots(3, 3, sharex=True, sharey=True, figsize=(4,4))
for i in range(3):
for j in range(3):
ax[i][j].imshow(X_batch[i*3+j], cmap=plt.get_cmap("gray"))
# show the plot
plt.show()
break
X_train_rotated = datagen.flow(X_train_rotate, y_train_rotate, batch_size=X_train_rotate.shape[0], shuffle=False).next()
X_train_rotated = X_train_rotated[0]
# merge rotated data with original
X_train_rot = np.concatenate((X_train,X_train_rotated),axis=0)
y_train_rot = np.concatenate((y_train,y_train_rotate),axis=0)
print('after',X_train.shape,X_test.shape)
print('after',y_train.shape,y_test.shape)
plt.imshow(X_train_rot[12])
plt.show()
after (48000, 28, 28, 1) (10000, 28, 28, 1) after (48000, 10) (10000, 10)
X_train_shift = X_train.copy()
y_train_shift = y_train.copy()
# define data preparation
datagen = ImageDataGenerator(width_shift_range=[-2,2], height_shift_range=[-2,2])
# fit parameters from data
datagen.fit(X_train_shift)
# configure batch size and retrieve one batch of images
for X_batch, y_batch in datagen.flow(X_train_shift, y_train_shift, batch_size=9, shuffle=False):
# create a grid of 3x3 images
fig, ax = plt.subplots(3, 3, sharex=True, sharey=True, figsize=(4,4))
for i in range(3):
for j in range(3):
ax[i][j].imshow(X_batch[i*3+j], cmap=plt.get_cmap("gray"))
# show the plot
plt.show()
break
X_train_shifted = datagen.flow(X_train_shift, y_train_shift, batch_size=X_train_shift.shape[0], shuffle=False).next()
X_train_shifted = X_train_shifted[0]
# merge rotated data with original
X_train_shif = np.concatenate((X_train,X_train_shifted),axis=0)
y_train_shif = np.concatenate((y_train,y_train_shift),axis=0)
print('after',X_train.shape,X_test.shape)
print('after',y_train.shape,y_test.shape)
after (48000, 28, 28, 1) (10000, 28, 28, 1) after (48000, 10) (10000, 10)
X_train_flip = X_train.copy()
y_train_flip = y_train.copy()
# define data preparation
datagen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True)
# fit parameters from data
datagen.fit(X_train_flip)
# configure batch size and retrieve one batch of images
for X_batch, y_batch in datagen.flow(X_train_flip, y_train_flip, batch_size=9, shuffle=False):
# create a grid of 3x3 images
fig, ax = plt.subplots(3, 3, sharex=True, sharey=True, figsize=(4,4))
for i in range(3):
for j in range(3):
ax[i][j].imshow(X_batch[i*3+j], cmap=plt.get_cmap("gray"))
# show the plot
plt.show()
break
X_train_flipped = datagen.flow(X_train_flip, y_train_flip, batch_size=X_train_flip.shape[0], shuffle=False).next()
X_train_flipped = X_train_flipped[0]
# merge flipped data with original
X_train_flips = np.concatenate((X_train,X_train_flipped),axis=0)
y_train_flips = np.concatenate((y_train,y_train_flip),axis=0)
print('after',X_train.shape,X_test.shape)
print('after',y_train.shape,y_test.shape)
after (48000, 28, 28, 1) (10000, 28, 28, 1) after (48000, 10) (10000, 10)
# concat flip shift and rotate
X_train = np.concatenate((X_train,X_train_shifted,X_train_rotated),axis=0)
y_train = np.concatenate((y_train,y_train_shift,y_train_rotate),axis=0)
print('total data points after adding all:',X_train.shape,X_test.shape)
total data points after adding all: (144000, 28, 28, 1) (10000, 28, 28, 1)

from tensorflow.keras.layers import Conv2D,Flatten
from numpy import mean
from numpy import std
from matplotlib import pyplot
from sklearn.model_selection import KFold
from keras.datasets import fashion_mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.layers import Activation
from keras.layers import LeakyReLU
def model_relu():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
return fashion_model
run_test_harness(model_relu(),X_train, y_train)
> 89.417 > 89.225 > 89.192 > 89.175 > 88.558
Accuracy: mean=89.113 std=0.291, n=5
def model_tanh():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='tanh',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='tanh'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='tanh'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='linear'))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_tanh(),X_train, y_train)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0
)
conv2d_1 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0
2D)
conv2d_2 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_2 (MaxPooling (None, 1, 1, 128) 0
2D)
flatten (Flatten) (None, 128) 0
dense (Dense) (None, 128) 16512
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 88.067
> 87.825
> 87.708
> 87.350
> 86.658
Accuracy: mean=87.522 std=0.490, n=5
from keras.layers import LeakyReLU
# fix random seed for reproducibility
def model_leaky_relu():
seed = 1
np.random.seed(seed)
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D((2, 2),padding='same'))
fashion_model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='linear'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_leaky_relu(),X_train, y_train)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 28, 28, 32) 320
leaky_re_lu (LeakyReLU) (None, 28, 28, 32) 0
max_pooling2d (MaxPooling2D (None, 14, 14, 32) 0
)
conv2d_1 (Conv2D) (None, 14, 14, 64) 18496
leaky_re_lu_1 (LeakyReLU) (None, 14, 14, 64) 0
max_pooling2d_1 (MaxPooling (None, 7, 7, 64) 0
2D)
conv2d_2 (Conv2D) (None, 7, 7, 128) 73856
leaky_re_lu_2 (LeakyReLU) (None, 7, 7, 128) 0
max_pooling2d_2 (MaxPooling (None, 4, 4, 128) 0
2D)
flatten (Flatten) (None, 2048) 0
dense (Dense) (None, 128) 262272
leaky_re_lu_3 (LeakyReLU) (None, 128) 0
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 356,234
Trainable params: 356,234
Non-trainable params: 0
_________________________________________________________________
> 91.450
> 91.700
> 91.642
> 92.017
> 91.658
Accuracy: mean=91.693 std=0.183, n=5
def model_selu():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='selu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='selu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='selu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='selu'))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_selu(),X_train, y_train)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0
)
conv2d_1 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0
2D)
conv2d_2 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_2 (MaxPooling (None, 1, 1, 128) 0
2D)
flatten (Flatten) (None, 128) 0
dense (Dense) (None, 128) 16512
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 87.958
> 88.275
> 88.017
> 87.958
> 87.508
Accuracy: mean=87.943 std=0.247, n=5
def model_elu():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='elu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='elu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='elu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='elu'))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_elu(),X_train, y_train)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0
)
conv2d_1 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0
2D)
conv2d_2 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_2 (MaxPooling (None, 1, 1, 128) 0
2D)
flatten (Flatten) (None, 128) 0
dense (Dense) (None, 128) 16512
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 89.192
> 89.042
> 88.450
> 88.250
> 88.450
Accuracy: mean=88.677 std=0.370, n=5
Dropout is randomly removing neurons in a layer but when predicting neurons is added back in. How can something random help in AI we may wonder...

In the picture above,if a bad node is blocked it helps the training process. However if a good node is blocked , the neural network can still learn from a separate representation of the data though not the best but its still fine
As our accuracy goes above 90 percent , we may tend to overfit and here we will try drop out to see if it can regularize and perhaps even improve validation scores
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dropout(0.2))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
fashion_model.fit(X_train, y_train, batch_size=32,epochs=100,verbose=1,validation_data=(X_test, y_test))
test_loss, test_acc = fashion_model.evaluate(X_test, y_test)
# Dataframe to keep track of all model scores
precision,recall,f1 = get_metrics(fashion_model,X_test)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['CNN relu adam w dropout',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0
)
conv2d_1 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0
2D)
conv2d_2 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_2 (MaxPooling (None, 1, 1, 128) 0
2D)
flatten (Flatten) (None, 128) 0
dense (Dense) (None, 128) 16512
dropout (Dropout) (None, 128) 0
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.7582 - accuracy: 0.7217 - val_loss: 0.4531 - val_accuracy: 0.8358
Epoch 2/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.5136 - accuracy: 0.8131 - val_loss: 0.3683 - val_accuracy: 0.8625
Epoch 3/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.4465 - accuracy: 0.8363 - val_loss: 0.3617 - val_accuracy: 0.8675
Epoch 4/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.4067 - accuracy: 0.8505 - val_loss: 0.3418 - val_accuracy: 0.8812
Epoch 5/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.3784 - accuracy: 0.8611 - val_loss: 0.3361 - val_accuracy: 0.8853
Epoch 6/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.3563 - accuracy: 0.8678 - val_loss: 0.3465 - val_accuracy: 0.8774
Epoch 7/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.3368 - accuracy: 0.8749 - val_loss: 0.3548 - val_accuracy: 0.8804
Epoch 8/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.3226 - accuracy: 0.8798 - val_loss: 0.3395 - val_accuracy: 0.8860
Epoch 9/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.3078 - accuracy: 0.8851 - val_loss: 0.3366 - val_accuracy: 0.8863
Epoch 10/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.2963 - accuracy: 0.8889 - val_loss: 0.3529 - val_accuracy: 0.8836
Epoch 11/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.2858 - accuracy: 0.8926 - val_loss: 0.3643 - val_accuracy: 0.8854
Epoch 12/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.2745 - accuracy: 0.8964 - val_loss: 0.3462 - val_accuracy: 0.8837
Epoch 13/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.2659 - accuracy: 0.8994 - val_loss: 0.3659 - val_accuracy: 0.8857
Epoch 14/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.2597 - accuracy: 0.9017 - val_loss: 0.3792 - val_accuracy: 0.8793
Epoch 15/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.2498 - accuracy: 0.9059 - val_loss: 0.4038 - val_accuracy: 0.8788
Epoch 16/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.2423 - accuracy: 0.9084 - val_loss: 0.3843 - val_accuracy: 0.8835
Epoch 17/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.2370 - accuracy: 0.9089 - val_loss: 0.4180 - val_accuracy: 0.8795
Epoch 18/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.2333 - accuracy: 0.9110 - val_loss: 0.4002 - val_accuracy: 0.8834
Epoch 19/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.2251 - accuracy: 0.9136 - val_loss: 0.4074 - val_accuracy: 0.8862
Epoch 20/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.2202 - accuracy: 0.9160 - val_loss: 0.4329 - val_accuracy: 0.8822
Epoch 21/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.2166 - accuracy: 0.9168 - val_loss: 0.4348 - val_accuracy: 0.8808
Epoch 22/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.2124 - accuracy: 0.9187 - val_loss: 0.4627 - val_accuracy: 0.8791
Epoch 23/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.2072 - accuracy: 0.9198 - val_loss: 0.4504 - val_accuracy: 0.8797
Epoch 24/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.2037 - accuracy: 0.9213 - val_loss: 0.4894 - val_accuracy: 0.8785
Epoch 25/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.1983 - accuracy: 0.9238 - val_loss: 0.4781 - val_accuracy: 0.8820
Epoch 26/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1962 - accuracy: 0.9242 - val_loss: 0.4856 - val_accuracy: 0.8785
Epoch 27/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1943 - accuracy: 0.9246 - val_loss: 0.4966 - val_accuracy: 0.8882
Epoch 28/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1882 - accuracy: 0.9281 - val_loss: 0.4843 - val_accuracy: 0.8846
Epoch 29/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1870 - accuracy: 0.9269 - val_loss: 0.5047 - val_accuracy: 0.8802
Epoch 30/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1838 - accuracy: 0.9287 - val_loss: 0.5187 - val_accuracy: 0.8873
Epoch 31/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.1793 - accuracy: 0.9308 - val_loss: 0.5635 - val_accuracy: 0.8791
Epoch 32/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.1798 - accuracy: 0.9303 - val_loss: 0.5362 - val_accuracy: 0.8772
Epoch 33/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1745 - accuracy: 0.9327 - val_loss: 0.5705 - val_accuracy: 0.8728
Epoch 34/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1734 - accuracy: 0.9329 - val_loss: 0.5501 - val_accuracy: 0.8781
Epoch 35/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1717 - accuracy: 0.9332 - val_loss: 0.5467 - val_accuracy: 0.8807
Epoch 36/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1693 - accuracy: 0.9351 - val_loss: 0.6154 - val_accuracy: 0.8724
Epoch 37/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.1662 - accuracy: 0.9357 - val_loss: 0.5850 - val_accuracy: 0.8746
Epoch 38/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1652 - accuracy: 0.9362 - val_loss: 0.5824 - val_accuracy: 0.8788
Epoch 39/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.1614 - accuracy: 0.9370 - val_loss: 0.6339 - val_accuracy: 0.8807
Epoch 40/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1613 - accuracy: 0.9378 - val_loss: 0.6436 - val_accuracy: 0.8839
Epoch 41/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.1628 - accuracy: 0.9373 - val_loss: 0.5807 - val_accuracy: 0.8823
Epoch 42/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1571 - accuracy: 0.9393 - val_loss: 0.6310 - val_accuracy: 0.8823
Epoch 43/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1563 - accuracy: 0.9392 - val_loss: 0.6180 - val_accuracy: 0.8800
Epoch 44/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.1556 - accuracy: 0.9403 - val_loss: 0.6323 - val_accuracy: 0.8833
Epoch 45/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.1544 - accuracy: 0.9400 - val_loss: 0.6491 - val_accuracy: 0.8818
Epoch 46/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.1517 - accuracy: 0.9424 - val_loss: 0.6512 - val_accuracy: 0.8821
Epoch 47/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1501 - accuracy: 0.9423 - val_loss: 0.6437 - val_accuracy: 0.8773
Epoch 48/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.1476 - accuracy: 0.9439 - val_loss: 0.6431 - val_accuracy: 0.8745
Epoch 49/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.1477 - accuracy: 0.9429 - val_loss: 0.6833 - val_accuracy: 0.8797
Epoch 50/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.1465 - accuracy: 0.9437 - val_loss: 0.6584 - val_accuracy: 0.8829
Epoch 51/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1444 - accuracy: 0.9441 - val_loss: 0.7164 - val_accuracy: 0.8755
Epoch 52/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1473 - accuracy: 0.9437 - val_loss: 0.6934 - val_accuracy: 0.8795
Epoch 53/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1403 - accuracy: 0.9455 - val_loss: 0.7207 - val_accuracy: 0.8757
Epoch 54/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1427 - accuracy: 0.9454 - val_loss: 0.7651 - val_accuracy: 0.8728
Epoch 55/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1407 - accuracy: 0.9462 - val_loss: 0.7268 - val_accuracy: 0.8772
Epoch 56/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.1393 - accuracy: 0.9467 - val_loss: 0.7324 - val_accuracy: 0.8775
Epoch 57/100
4500/4500 [==============================] - 19s 4ms/step - loss: 0.1354 - accuracy: 0.9480 - val_loss: 0.7867 - val_accuracy: 0.8752
Epoch 58/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.1347 - accuracy: 0.9487 - val_loss: 0.7985 - val_accuracy: 0.8743
Epoch 59/100
4500/4500 [==============================] - 18s 4ms/step - loss: 0.1345 - accuracy: 0.9486 - val_loss: 0.7779 - val_accuracy: 0.8800
Epoch 60/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1377 - accuracy: 0.9478 - val_loss: 0.7708 - val_accuracy: 0.8746
Epoch 61/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1324 - accuracy: 0.9491 - val_loss: 0.7724 - val_accuracy: 0.8766
Epoch 62/100
4500/4500 [==============================] - 23s 5ms/step - loss: 0.1311 - accuracy: 0.9497 - val_loss: 0.8121 - val_accuracy: 0.8795
Epoch 63/100
4500/4500 [==============================] - 23s 5ms/step - loss: 0.1319 - accuracy: 0.9503 - val_loss: 0.8296 - val_accuracy: 0.8770
Epoch 64/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1313 - accuracy: 0.9501 - val_loss: 0.8243 - val_accuracy: 0.8775
Epoch 65/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1310 - accuracy: 0.9504 - val_loss: 0.8566 - val_accuracy: 0.8755
Epoch 66/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1287 - accuracy: 0.9514 - val_loss: 0.8480 - val_accuracy: 0.8765
Epoch 67/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1285 - accuracy: 0.9520 - val_loss: 0.8151 - val_accuracy: 0.8757
Epoch 68/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1261 - accuracy: 0.9520 - val_loss: 0.8458 - val_accuracy: 0.8777
Epoch 69/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1263 - accuracy: 0.9522 - val_loss: 0.9450 - val_accuracy: 0.8768
Epoch 70/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1270 - accuracy: 0.9519 - val_loss: 0.8519 - val_accuracy: 0.8761
Epoch 71/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1227 - accuracy: 0.9537 - val_loss: 0.8573 - val_accuracy: 0.8784
Epoch 72/100
4500/4500 [==============================] - 23s 5ms/step - loss: 0.1275 - accuracy: 0.9521 - val_loss: 0.9464 - val_accuracy: 0.8778
Epoch 73/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1243 - accuracy: 0.9535 - val_loss: 0.8969 - val_accuracy: 0.8777
Epoch 74/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1222 - accuracy: 0.9539 - val_loss: 0.9736 - val_accuracy: 0.8675
Epoch 75/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1249 - accuracy: 0.9530 - val_loss: 0.9710 - val_accuracy: 0.8721
Epoch 76/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1227 - accuracy: 0.9544 - val_loss: 0.9653 - val_accuracy: 0.8714
Epoch 77/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1204 - accuracy: 0.9551 - val_loss: 0.9612 - val_accuracy: 0.8814
Epoch 78/100
4500/4500 [==============================] - 20s 4ms/step - loss: 0.1226 - accuracy: 0.9539 - val_loss: 0.8822 - val_accuracy: 0.8770
Epoch 79/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1188 - accuracy: 0.9555 - val_loss: 0.8831 - val_accuracy: 0.8772
Epoch 80/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1190 - accuracy: 0.9553 - val_loss: 0.9547 - val_accuracy: 0.8752
Epoch 81/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1171 - accuracy: 0.9561 - val_loss: 0.9344 - val_accuracy: 0.8769
Epoch 82/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1175 - accuracy: 0.9562 - val_loss: 0.9463 - val_accuracy: 0.8744
Epoch 83/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1157 - accuracy: 0.9569 - val_loss: 0.9458 - val_accuracy: 0.8764
Epoch 84/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1182 - accuracy: 0.9556 - val_loss: 0.9314 - val_accuracy: 0.8747
Epoch 85/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1138 - accuracy: 0.9572 - val_loss: 1.0168 - val_accuracy: 0.8767
Epoch 86/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1194 - accuracy: 0.9558 - val_loss: 0.9397 - val_accuracy: 0.8737
Epoch 87/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1133 - accuracy: 0.9575 - val_loss: 1.0238 - val_accuracy: 0.8753
Epoch 88/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1152 - accuracy: 0.9572 - val_loss: 0.9559 - val_accuracy: 0.8738
Epoch 89/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1169 - accuracy: 0.9574 - val_loss: 1.0175 - val_accuracy: 0.8743
Epoch 90/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1122 - accuracy: 0.9581 - val_loss: 1.0666 - val_accuracy: 0.8748
Epoch 91/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1148 - accuracy: 0.9581 - val_loss: 1.0382 - val_accuracy: 0.8697
Epoch 92/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1142 - accuracy: 0.9584 - val_loss: 1.0335 - val_accuracy: 0.8727
Epoch 93/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1132 - accuracy: 0.9579 - val_loss: 1.0185 - val_accuracy: 0.8754
Epoch 94/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.1119 - accuracy: 0.9590 - val_loss: 1.0660 - val_accuracy: 0.8726
Epoch 95/100
4500/4500 [==============================] - 21s 5ms/step - loss: 0.1115 - accuracy: 0.9587 - val_loss: 1.0607 - val_accuracy: 0.8794
Epoch 96/100
4500/4500 [==============================] - 23s 5ms/step - loss: 0.1105 - accuracy: 0.9589 - val_loss: 1.0217 - val_accuracy: 0.8772
Epoch 97/100
4500/4500 [==============================] - 23s 5ms/step - loss: 0.1115 - accuracy: 0.9592 - val_loss: 1.0496 - val_accuracy: 0.8737
Epoch 98/100
4500/4500 [==============================] - 23s 5ms/step - loss: 0.1080 - accuracy: 0.9600 - val_loss: 1.0028 - val_accuracy: 0.8737
Epoch 99/100
4500/4500 [==============================] - 23s 5ms/step - loss: 0.1123 - accuracy: 0.9591 - val_loss: 1.1353 - val_accuracy: 0.8699
Epoch 100/100
4500/4500 [==============================] - 23s 5ms/step - loss: 0.1106 - accuracy: 0.9594 - val_loss: 1.1093 - val_accuracy: 0.8721
313/313 [==============================] - 1s 3ms/step - loss: 1.1093 - accuracy: 0.8721
313/313 [==============================] - 1s 2ms/step
Adding drop out does improve performance

Instead of computing the gradients over the entire dataset, it performs a parameter update for each example in the dataset.The problem of SGD is that the updates are frequent and with a high variance, so the objective function heavily fluctuates during training.This fluctuation can be an advantage with respect to batch gradient descent because it allows the function to jump to better local minima, but at the same time it can represent a disadvantage with respect to the convergence in a specific local minima.
def model_sgd():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dropout(0.2))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.SGD(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_sgd(),X_train, y_train)
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_3 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_3 (MaxPooling (None, 13, 13, 32) 0
2D)
conv2d_4 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_4 (MaxPooling (None, 5, 5, 64) 0
2D)
conv2d_5 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_5 (MaxPooling (None, 1, 1, 128) 0
2D)
flatten_1 (Flatten) (None, 128) 0
dense_2 (Dense) (None, 128) 16512
dropout_1 (Dropout) (None, 128) 0
dense_3 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 87.883
> 87.800
> 88.100
> 88.008
> 88.375
Accuracy: mean=88.033 std=0.199, n=5
It adapts the learning rate to the parameters performing small updates for frequently occurring features and large updates for the rarest ones.In this way, the network is able to capture information belonging to features that are not frequent, putting them in evidence and giving them the right weight.The problem of Adagrad is that it adjusts the learning rate for each parameter according to all the past gradients. So, the possibility of having a very small learning rate after a high number of steps — resulting from the accumulation of all the past gradients — is relevant.If the learning rate is too much small, we simply can’t update weights and the consequence is that the network doesn’t learn anymore.
def model_adagrad():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dropout(0.2))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adagrad(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_adagrad(),X_train, y_train)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0
)
conv2d_1 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0
2D)
conv2d_2 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_2 (MaxPooling (None, 1, 1, 128) 0
2D)
flatten (Flatten) (None, 128) 0
dense (Dense) (None, 128) 16512
dropout (Dropout) (None, 128) 0
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 79.500
> 82.292
> 83.558
> 84.250
> 84.750
Accuracy: mean=82.870 std=1.876, n=5
It improves the previous algorithm by introducing a history window which sets a fixed number of past gradients to take in consideration during the training.In this way, we don’t have the problem of the vanishing learning rate.
def model_adadelta():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dropout(0.2))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_adadelta(),X_train, y_train)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0
)
conv2d_1 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0
2D)
conv2d_2 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_2 (MaxPooling (None, 1, 1, 128) 0
2D)
flatten (Flatten) (None, 128) 0
dense (Dense) (None, 128) 16512
dropout (Dropout) (None, 128) 0
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 72.942
> 75.433
> 77.200
> 78.517
> 79.675
Accuracy: mean=76.753 std=2.371, n=5
It is very similar to Adadelta. The only difference is in the way they manage the past gradients.
def model_rmsprop():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dropout(0.2))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.RMSprop(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_rmsprop(),X_train, y_train)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0
)
conv2d_1 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0
2D)
conv2d_2 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_2 (MaxPooling (None, 1, 1, 128) 0
2D)
flatten (Flatten) (None, 128) 0
dense (Dense) (None, 128) 16512
dropout (Dropout) (None, 128) 0
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 88.133
> 88.275
> 88.308
> 88.300
> 87.867
Accuracy: mean=88.177 std=0.167, n=5
It adds to the advantages of Adadelta and RMSprop, the storing of an exponentially decaying average of past gradients similar to momentum.
def model_adam():
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
fashion_model.add(MaxPooling2D((2, 2)))
fashion_model.add(Conv2D(64, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Conv2D(128, (3, 3), activation='relu'))
fashion_model.add(MaxPooling2D(pool_size=(2, 2)))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='relu'))
fashion_model.add(Dropout(0.2))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
return fashion_model
run_test_harness(model_adam(),X_train, y_train)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0
)
conv2d_1 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0
2D)
conv2d_2 (Conv2D) (None, 3, 3, 128) 73856
max_pooling2d_2 (MaxPooling (None, 1, 1, 128) 0
2D)
flatten (Flatten) (None, 128) 0
dense (Dense) (None, 128) 16512
dropout (Dropout) (None, 128) 0
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 110,474
Trainable params: 110,474
Non-trainable params: 0
_________________________________________________________________
> 89.625
> 88.858
> 89.033
> 89.608
> 88.658
Accuracy: mean=89.157 std=0.394, n=5
reduced learning rate on plateau is added and batch size is increased to hasten the compute time and also more epochs with early stopping to see performance of models.

reducing learning rate as val_loss plateaus allows optimizer to more efficiently find minimum in loss surface
from keras.callbacks import ReduceLROnPlateau
### function for looping through all the models
def tune_model_act_opt(act,opt):
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation=act,
kernel_initializer='he_normal',
input_shape=(28,28,1)))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation=act))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), activation=act))
model.add(BatchNormalization())
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation=act))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(10, activation='softmax'))
# compile model
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt,metrics=['accuracy'])
return model
# plot diagnostic learning curves
def summarize_diagnostics(histories,act,opt):
# plot loss
pyplot.subplot(211)
pyplot.title(f'Cross Entropy Loss {act},{opt}')
pyplot.plot(histories.history['loss'], color='blue', label='train')
pyplot.plot(histories.history['val_loss'], color='orange', label='test')
# plot accuracy
pyplot.subplot(212)
pyplot.title(f'Classification Accuracy {act},{opt}')
pyplot.plot(histories.history['accuracy'], color='blue', label='train')
pyplot.plot(histories.history['val_accuracy'], color='orange', label='test')
pyplot.legend()
pyplot.show()
def run_test_harness_act_opt(act,opt,X_train, y_train, X_test, y_test,X_val,y_val):
model = tune_model_act_opt(act,opt)
# fit model
h_callback = EarlyStopping(monitor='val_accuracy', patience=5)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
history = model.fit(X_train, y_train, epochs=100, batch_size=128,validation_data=(X_val, y_val), verbose=0 ,callbacks=[h_callback,reduce_lr])
# evaluate model
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
# ran into memory leakage problem trying to fix with this
gc.collect()
tf.keras.backend.clear_session()
del model
print('Test Accuracy> %.3f' % (test_acc * 100.0))
print('Test Loss> %.3f' % (test_loss * 100.0))
summarize_diagnostics(history,act,opt)
return (test_acc*100, test_loss*100)
from itertools import product
optimizers = ['adam', 'sgd', 'rmsprop', 'adagrad', 'adadelta', 'adamax', 'nadam']
activation = ['relu', 'selu', 'elu', 'tanh',keras.layers.LeakyReLU(alpha=0.01)]
accuracy_act_opt = list()
loss_act_opt = list()
activation_names = ['relu', 'selu', 'elu', 'tanh','LeakyReLU']
act_opt = list(product(activation_names,optimizers))
for act,opt in product(activation,optimizers):
acc,loss = run_test_harness_act_opt(act,opt,X_train, y_train, X_test, y_test,X_val,y_val)
accuracy_act_opt.append(acc)
loss_act_opt.append(loss)
Test Accuracy> 90.910 Test Loss> 24.426
Test Accuracy> 88.340 Test Loss> 32.019
Test Accuracy> 88.820 Test Loss> 30.779
Test Accuracy> 84.740 Test Loss> 41.430
Test Accuracy> 78.990 Test Loss> 58.161
Test Accuracy> 91.380 Test Loss> 23.999
Test Accuracy> 90.730 Test Loss> 25.181
Test Accuracy> 89.880 Test Loss> 27.888
Test Accuracy> 86.880 Test Loss> 35.003
Test Accuracy> 91.080 Test Loss> 25.031
Test Accuracy> 80.740 Test Loss> 52.328
Test Accuracy> 72.820 Test Loss> 77.466
Test Accuracy> 91.790 Test Loss> 22.988
Test Accuracy> 90.490 Test Loss> 25.841
Test Accuracy> 89.940 Test Loss> 27.351
Test Accuracy> 88.940 Test Loss> 30.257
Test Accuracy> 90.030 Test Loss> 28.227
Test Accuracy> 81.750 Test Loss> 49.088
Test Accuracy> 78.010 Test Loss> 59.107
Test Accuracy> 90.790 Test Loss> 24.655
Test Accuracy> 90.620 Test Loss> 25.135
Test Accuracy> 89.570 Test Loss> 28.990
Test Accuracy> 88.970 Test Loss> 30.908
Test Accuracy> 88.120 Test Loss> 33.598
Test Accuracy> 81.990 Test Loss> 49.509
Test Accuracy> 74.460 Test Loss> 69.740
Test Accuracy> 90.170 Test Loss> 26.992
Test Accuracy> 88.780 Test Loss> 31.100
Test Accuracy> 91.070 Test Loss> 24.577
Test Accuracy> 88.530 Test Loss> 31.066
Test Accuracy> 90.180 Test Loss> 26.743
Test Accuracy> 85.450 Test Loss> 40.049
Test Accuracy> 77.890 Test Loss> 58.998
Test Accuracy> 90.800 Test Loss> 24.973
Test Accuracy> 89.990 Test Loss> 27.396
# create a dataframe of scores_act_opt and act_opt
df_act_opt = pd.DataFrame({'act_opt':act_opt,'accuracy':accuracy_act_opt,'loss':loss_act_opt})
# highlight highest score_act_opt in df_act_opt
df_act_opt.style.highlight_max(subset=['accuracy'],color='green', axis=0).highlight_min(subset=['loss'],color='green', axis=0)
# style only score_act_opt in df_act_opt
| act_opt | accuracy | loss | |
|---|---|---|---|
| 0 | ('relu', 'adam') | 90.910000 | 24.425581 |
| 1 | ('relu', 'sgd') | 88.340002 | 32.019371 |
| 2 | ('relu', 'rmsprop') | 88.819999 | 30.779332 |
| 3 | ('relu', 'adagrad') | 84.740001 | 41.430226 |
| 4 | ('relu', 'adadelta') | 78.990000 | 58.160585 |
| 5 | ('relu', 'adamax') | 91.380000 | 23.999012 |
| 6 | ('relu', 'nadam') | 90.730000 | 25.180891 |
| 7 | ('selu', 'adam') | 89.880002 | 27.887952 |
| 8 | ('selu', 'sgd') | 86.879998 | 35.003248 |
| 9 | ('selu', 'rmsprop') | 91.079998 | 25.030890 |
| 10 | ('selu', 'adagrad') | 80.739999 | 52.328128 |
| 11 | ('selu', 'adadelta') | 72.820002 | 77.466422 |
| 12 | ('selu', 'adamax') | 91.790003 | 22.987714 |
| 13 | ('selu', 'nadam') | 90.490001 | 25.841162 |
| 14 | ('elu', 'adam') | 89.940000 | 27.350724 |
| 15 | ('elu', 'sgd') | 88.940001 | 30.256987 |
| 16 | ('elu', 'rmsprop') | 90.030003 | 28.227046 |
| 17 | ('elu', 'adagrad') | 81.750000 | 49.087885 |
| 18 | ('elu', 'adadelta') | 78.009999 | 59.106594 |
| 19 | ('elu', 'adamax') | 90.789998 | 24.654898 |
| 20 | ('elu', 'nadam') | 90.619999 | 25.134510 |
| 21 | ('tanh', 'adam') | 89.569998 | 28.989506 |
| 22 | ('tanh', 'sgd') | 88.970000 | 30.908495 |
| 23 | ('tanh', 'rmsprop') | 88.120002 | 33.597705 |
| 24 | ('tanh', 'adagrad') | 81.989998 | 49.509081 |
| 25 | ('tanh', 'adadelta') | 74.460000 | 69.739568 |
| 26 | ('tanh', 'adamax') | 90.170002 | 26.991671 |
| 27 | ('tanh', 'nadam') | 88.779998 | 31.099814 |
| 28 | ('LeakyReLU', 'adam') | 91.070002 | 24.576949 |
| 29 | ('LeakyReLU', 'sgd') | 88.529998 | 31.065875 |
| 30 | ('LeakyReLU', 'rmsprop') | 90.179998 | 26.742953 |
| 31 | ('LeakyReLU', 'adagrad') | 85.450000 | 40.048939 |
| 32 | ('LeakyReLU', 'adadelta') | 77.890003 | 58.998293 |
| 33 | ('LeakyReLU', 'adamax') | 90.799999 | 24.973015 |
| 34 | ('LeakyReLU', 'nadam') | 89.990002 | 27.396315 |
seed = 1
np.random.seed(seed)
fashion_model = Sequential()
fashion_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',input_shape=(28,28,1),padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D((2, 2),padding='same'))
fashion_model.add(Conv2D(64, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Conv2D(128, (3, 3), activation='linear',padding='same'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
fashion_model.add(Flatten())
fashion_model.add(Dense(128, activation='linear'))
fashion_model.add(LeakyReLU(alpha=0.1))
fashion_model.add(Dense(num_classes, activation='softmax'))
fashion_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
fashion_model.summary()
# fit model
h_callback = EarlyStopping(monitor='val_accuracy', patience=10)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
history = fashion_model.fit(X_train, y_train, epochs=100, batch_size=128,validation_data=(X_val, y_val), verbose=0 ,callbacks=[h_callback,reduce_lr])
# evaluate model
_, acc = fashion_model.evaluate(X_test, y_test, verbose=0)
print('> %.3f' % (acc * 100.0))
summarize_diagnostics(history,act,opt)
# learning curves
summarize_diagnostics(history,act,opt)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 28, 28, 32) 320
leaky_re_lu (LeakyReLU) (None, 28, 28, 32) 0
max_pooling2d (MaxPooling2D (None, 14, 14, 32) 0
)
conv2d_1 (Conv2D) (None, 14, 14, 64) 18496
leaky_re_lu_1 (LeakyReLU) (None, 14, 14, 64) 0
max_pooling2d_1 (MaxPooling (None, 7, 7, 64) 0
2D)
conv2d_2 (Conv2D) (None, 7, 7, 128) 73856
leaky_re_lu_2 (LeakyReLU) (None, 7, 7, 128) 0
max_pooling2d_2 (MaxPooling (None, 4, 4, 128) 0
2D)
flatten (Flatten) (None, 2048) 0
dense (Dense) (None, 128) 262272
leaky_re_lu_3 (LeakyReLU) (None, 128) 0
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 356,234
Trainable params: 356,234
Non-trainable params: 0
_________________________________________________________________
> 91.770

Graph 1. Model with a good fit and high variance. Source: https://www.researchgate.net/publication/332412613
We would like a good fit instead as sometimes there are noisy data that will prevent us from generalizing well, preventing the model from predicting well on real world data
import tensorflow
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
kernel_initializer='he_normal',
input_shape=(28,28,1)))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(10, activation='softmax', kernel_regularizer=tensorflow.keras.regularizers.L1(l1=0.05)))
#loop through the optimizers
model.compile(optimizer='adam',
loss=tf.keras.losses.categorical_crossentropy,
metrics=['accuracy'])
model.summary()
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
h_callback = model.fit(X_train, y_train, epochs = 100,
validation_data=(X_val, y_val), callbacks=[early_stopping,reduce_lr])
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_3 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_3 (MaxPooling (None, 13, 13, 32) 0
2D)
dropout (Dropout) (None, 13, 13, 32) 0
conv2d_4 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_4 (MaxPooling (None, 5, 5, 64) 0
2D)
dropout_1 (Dropout) (None, 5, 5, 64) 0
conv2d_5 (Conv2D) (None, 3, 3, 128) 73856
dropout_2 (Dropout) (None, 3, 3, 128) 0
flatten_1 (Flatten) (None, 1152) 0
dense_2 (Dense) (None, 128) 147584
dropout_3 (Dropout) (None, 128) 0
flatten_2 (Flatten) (None, 128) 0
dense_3 (Dense) (None, 10) 1290
=================================================================
Total params: 241,546
Trainable params: 241,546
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
4500/4500 [==============================] - 27s 6ms/step - loss: 1.2175 - accuracy: 0.6589 - val_loss: 0.6184 - val_accuracy: 0.8299 - lr: 0.0010
Epoch 2/100
4500/4500 [==============================] - 24s 5ms/step - loss: 0.8421 - accuracy: 0.7502 - val_loss: 0.5624 - val_accuracy: 0.8477 - lr: 0.0010
Epoch 3/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.7639 - accuracy: 0.7798 - val_loss: 0.5089 - val_accuracy: 0.8748 - lr: 0.0010
Epoch 4/100
4500/4500 [==============================] - 23s 5ms/step - loss: 0.7172 - accuracy: 0.7952 - val_loss: 0.4964 - val_accuracy: 0.8662 - lr: 0.0010
Epoch 5/100
4500/4500 [==============================] - 23s 5ms/step - loss: 0.6859 - accuracy: 0.8074 - val_loss: 0.4726 - val_accuracy: 0.8829 - lr: 0.0010
Epoch 6/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.6683 - accuracy: 0.8127 - val_loss: 0.4535 - val_accuracy: 0.8907 - lr: 0.0010
Epoch 7/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.6535 - accuracy: 0.8181 - val_loss: 0.4583 - val_accuracy: 0.8873 - lr: 0.0010
Epoch 8/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.6353 - accuracy: 0.8243 - val_loss: 0.4616 - val_accuracy: 0.8818 - lr: 0.0010
Epoch 9/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.6303 - accuracy: 0.8255 - val_loss: 0.4440 - val_accuracy: 0.8853 - lr: 0.0010
Epoch 10/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.6247 - accuracy: 0.8289 - val_loss: 0.4384 - val_accuracy: 0.8889 - lr: 0.0010
Epoch 11/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.6154 - accuracy: 0.8320 - val_loss: 0.4594 - val_accuracy: 0.8886 - lr: 0.0010
Epoch 12/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.6069 - accuracy: 0.8336 - val_loss: 0.4180 - val_accuracy: 0.8954 - lr: 0.0010
Epoch 13/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.6067 - accuracy: 0.8347 - val_loss: 0.4072 - val_accuracy: 0.9007 - lr: 0.0010
Epoch 14/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.6015 - accuracy: 0.8368 - val_loss: 0.4336 - val_accuracy: 0.8965 - lr: 0.0010
Epoch 15/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.6004 - accuracy: 0.8364 - val_loss: 0.4079 - val_accuracy: 0.8960 - lr: 0.0010
Epoch 16/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5934 - accuracy: 0.8383 - val_loss: 0.4149 - val_accuracy: 0.8944 - lr: 0.0010
Epoch 17/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5906 - accuracy: 0.8408 - val_loss: 0.4129 - val_accuracy: 0.9028 - lr: 0.0010
Epoch 18/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5853 - accuracy: 0.8412 - val_loss: 0.4252 - val_accuracy: 0.8907 - lr: 0.0010
Epoch 19/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5858 - accuracy: 0.8411 - val_loss: 0.4040 - val_accuracy: 0.8995 - lr: 0.0010
Epoch 20/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5851 - accuracy: 0.8412 - val_loss: 0.3884 - val_accuracy: 0.9071 - lr: 0.0010
Epoch 21/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5807 - accuracy: 0.8433 - val_loss: 0.4239 - val_accuracy: 0.9028 - lr: 0.0010
Epoch 22/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5783 - accuracy: 0.8437 - val_loss: 0.4045 - val_accuracy: 0.9055 - lr: 0.0010
Epoch 23/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.5770 - accuracy: 0.8444 - val_loss: 0.4173 - val_accuracy: 0.9055 - lr: 0.0010
Epoch 24/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5720 - accuracy: 0.8455 - val_loss: 0.3756 - val_accuracy: 0.9090 - lr: 0.0010
Epoch 25/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.5731 - accuracy: 0.8455 - val_loss: 0.4036 - val_accuracy: 0.9016 - lr: 0.0010
Epoch 26/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.5707 - accuracy: 0.8468 - val_loss: 0.4193 - val_accuracy: 0.8986 - lr: 0.0010
Epoch 27/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5717 - accuracy: 0.8466 - val_loss: 0.3888 - val_accuracy: 0.9054 - lr: 0.0010
Epoch 28/100
4500/4500 [==============================] - 24s 5ms/step - loss: 0.5702 - accuracy: 0.8471 - val_loss: 0.4025 - val_accuracy: 0.9043 - lr: 0.0010
Epoch 29/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5645 - accuracy: 0.8483 - val_loss: 0.4147 - val_accuracy: 0.9044 - lr: 0.0010
Epoch 30/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5659 - accuracy: 0.8486 - val_loss: 0.4204 - val_accuracy: 0.8984 - lr: 0.0010
Epoch 31/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5625 - accuracy: 0.8495 - val_loss: 0.3861 - val_accuracy: 0.8984 - lr: 0.0010
Epoch 32/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5652 - accuracy: 0.8492 - val_loss: 0.4049 - val_accuracy: 0.9001 - lr: 0.0010
Epoch 33/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5619 - accuracy: 0.8493 - val_loss: 0.4112 - val_accuracy: 0.9050 - lr: 0.0010
Epoch 34/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5579 - accuracy: 0.8507 - val_loss: 0.3724 - val_accuracy: 0.9097 - lr: 0.0010
Epoch 35/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5566 - accuracy: 0.8503 - val_loss: 0.3717 - val_accuracy: 0.9112 - lr: 0.0010
Epoch 36/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5532 - accuracy: 0.8512 - val_loss: 0.3919 - val_accuracy: 0.9078 - lr: 0.0010
Epoch 37/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5583 - accuracy: 0.8503 - val_loss: 0.3996 - val_accuracy: 0.9106 - lr: 0.0010
Epoch 38/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5557 - accuracy: 0.8514 - val_loss: 0.4100 - val_accuracy: 0.9030 - lr: 0.0010
Epoch 39/100
4500/4500 [==============================] - 24s 5ms/step - loss: 0.5535 - accuracy: 0.8519 - val_loss: 0.4067 - val_accuracy: 0.8978 - lr: 0.0010
Epoch 40/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.5528 - accuracy: 0.8520 - val_loss: 0.4190 - val_accuracy: 0.9059 - lr: 0.0010
Epoch 41/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.5499 - accuracy: 0.8537 - val_loss: 0.4073 - val_accuracy: 0.9087 - lr: 0.0010
Epoch 42/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.5559 - accuracy: 0.8515 - val_loss: 0.3873 - val_accuracy: 0.9080 - lr: 0.0010
Epoch 43/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.5513 - accuracy: 0.8523 - val_loss: 0.3894 - val_accuracy: 0.9072 - lr: 0.0010
Epoch 44/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.5496 - accuracy: 0.8541 - val_loss: 0.4043 - val_accuracy: 0.9009 - lr: 0.0010
Epoch 45/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.5482 - accuracy: 0.8536 - val_loss: 0.3694 - val_accuracy: 0.9118 - lr: 0.0010
Epoch 46/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.5510 - accuracy: 0.8544 - val_loss: 0.3914 - val_accuracy: 0.9038 - lr: 0.0010
Epoch 47/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.5517 - accuracy: 0.8535 - val_loss: 0.3853 - val_accuracy: 0.9085 - lr: 0.0010
Epoch 48/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.5477 - accuracy: 0.8553 - val_loss: 0.3822 - val_accuracy: 0.9060 - lr: 0.0010
Epoch 49/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.5469 - accuracy: 0.8545 - val_loss: 0.3920 - val_accuracy: 0.9038 - lr: 0.0010
Epoch 50/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5457 - accuracy: 0.8542 - val_loss: 0.3825 - val_accuracy: 0.9089 - lr: 0.0010
Epoch 51/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5459 - accuracy: 0.8537 - val_loss: 0.3840 - val_accuracy: 0.9087 - lr: 0.0010
Epoch 52/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5455 - accuracy: 0.8544 - val_loss: 0.3833 - val_accuracy: 0.9078 - lr: 0.0010
Epoch 53/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5477 - accuracy: 0.8549 - val_loss: 0.4214 - val_accuracy: 0.8996 - lr: 0.0010
Epoch 54/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5446 - accuracy: 0.8560 - val_loss: 0.4025 - val_accuracy: 0.8976 - lr: 0.0010
Epoch 55/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5459 - accuracy: 0.8555 - val_loss: 0.3847 - val_accuracy: 0.9090 - lr: 0.0010
Epoch 55: early stopping
import tensorflow
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
kernel_initializer='he_normal',
input_shape=(28,28,1)))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(10, activation='softmax', kernel_regularizer=tensorflow.keras.regularizers.L2(l2=0.05)))
#loop through the optimizers
model.compile(optimizer='adam',
loss=tf.keras.losses.categorical_crossentropy,
metrics=['accuracy'])
model.summary()
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
h_callback = model.fit(X_train, y_train, epochs = 100,
validation_data=(X_val, y_val), callbacks=[early_stopping,reduce_lr])
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_6 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_5 (MaxPooling (None, 13, 13, 32) 0
2D)
dropout_4 (Dropout) (None, 13, 13, 32) 0
conv2d_7 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_6 (MaxPooling (None, 5, 5, 64) 0
2D)
dropout_5 (Dropout) (None, 5, 5, 64) 0
conv2d_8 (Conv2D) (None, 3, 3, 128) 73856
dropout_6 (Dropout) (None, 3, 3, 128) 0
flatten_3 (Flatten) (None, 1152) 0
dense_4 (Dense) (None, 128) 147584
dropout_7 (Dropout) (None, 128) 0
flatten_4 (Flatten) (None, 128) 0
dense_5 (Dense) (None, 10) 1290
=================================================================
Total params: 241,546
Trainable params: 241,546
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.9006 - accuracy: 0.6910 - val_loss: 0.4622 - val_accuracy: 0.8417 - lr: 0.0010
Epoch 2/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.6355 - accuracy: 0.7805 - val_loss: 0.3868 - val_accuracy: 0.8686 - lr: 0.0010
Epoch 3/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.5644 - accuracy: 0.8056 - val_loss: 0.3397 - val_accuracy: 0.8848 - lr: 0.0010
Epoch 4/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.5275 - accuracy: 0.8193 - val_loss: 0.3504 - val_accuracy: 0.8878 - lr: 0.0010
Epoch 5/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.5094 - accuracy: 0.8266 - val_loss: 0.3212 - val_accuracy: 0.8922 - lr: 0.0010
Epoch 6/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.4945 - accuracy: 0.8309 - val_loss: 0.3053 - val_accuracy: 0.8968 - lr: 0.0010
Epoch 7/100
4500/4500 [==============================] - 24s 5ms/step - loss: 0.4802 - accuracy: 0.8354 - val_loss: 0.3133 - val_accuracy: 0.8932 - lr: 0.0010
Epoch 8/100
4500/4500 [==============================] - 25s 5ms/step - loss: 0.4695 - accuracy: 0.8397 - val_loss: 0.2980 - val_accuracy: 0.8974 - lr: 0.0010
Epoch 9/100
4500/4500 [==============================] - 24s 5ms/step - loss: 0.4632 - accuracy: 0.8418 - val_loss: 0.3150 - val_accuracy: 0.8951 - lr: 0.0010
Epoch 10/100
4500/4500 [==============================] - 24s 5ms/step - loss: 0.4578 - accuracy: 0.8442 - val_loss: 0.3047 - val_accuracy: 0.8978 - lr: 0.0010
Epoch 11/100
4500/4500 [==============================] - 25s 5ms/step - loss: 0.4520 - accuracy: 0.8450 - val_loss: 0.3023 - val_accuracy: 0.9029 - lr: 0.0010
Epoch 12/100
4500/4500 [==============================] - 25s 5ms/step - loss: 0.4454 - accuracy: 0.8486 - val_loss: 0.2853 - val_accuracy: 0.9056 - lr: 0.0010
Epoch 13/100
4500/4500 [==============================] - 24s 5ms/step - loss: 0.4432 - accuracy: 0.8490 - val_loss: 0.3158 - val_accuracy: 0.8889 - lr: 0.0010
Epoch 14/100
4500/4500 [==============================] - 25s 5ms/step - loss: 0.4376 - accuracy: 0.8507 - val_loss: 0.2984 - val_accuracy: 0.8988 - lr: 0.0010
Epoch 15/100
4500/4500 [==============================] - 25s 5ms/step - loss: 0.4355 - accuracy: 0.8511 - val_loss: 0.2716 - val_accuracy: 0.9114 - lr: 0.0010
Epoch 16/100
4500/4500 [==============================] - 25s 5ms/step - loss: 0.4336 - accuracy: 0.8513 - val_loss: 0.2908 - val_accuracy: 0.9031 - lr: 0.0010
Epoch 17/100
4500/4500 [==============================] - 23s 5ms/step - loss: 0.4303 - accuracy: 0.8529 - val_loss: 0.2889 - val_accuracy: 0.9033 - lr: 0.0010
Epoch 18/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.4258 - accuracy: 0.8550 - val_loss: 0.2740 - val_accuracy: 0.9093 - lr: 0.0010
Epoch 19/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.4239 - accuracy: 0.8546 - val_loss: 0.2763 - val_accuracy: 0.9066 - lr: 0.0010
Epoch 20/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.4212 - accuracy: 0.8560 - val_loss: 0.2981 - val_accuracy: 0.8987 - lr: 0.0010
Epoch 21/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.4202 - accuracy: 0.8559 - val_loss: 0.2866 - val_accuracy: 0.9073 - lr: 0.0010
Epoch 22/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.4198 - accuracy: 0.8574 - val_loss: 0.2768 - val_accuracy: 0.9073 - lr: 0.0010
Epoch 23/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.4180 - accuracy: 0.8577 - val_loss: 0.2895 - val_accuracy: 0.9003 - lr: 0.0010
Epoch 24/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.4158 - accuracy: 0.8582 - val_loss: 0.2722 - val_accuracy: 0.9097 - lr: 0.0010
Epoch 25/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.4146 - accuracy: 0.8586 - val_loss: 0.2788 - val_accuracy: 0.9050 - lr: 0.0010
Epoch 25: early stopping
import tensorflow
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
kernel_initializer='he_normal',
input_shape=(28,28,1)))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(10, activation='softmax', kernel_regularizer=tensorflow.keras.regularizers.L1L2(l1=0.01, l2=0.01)))
#loop through the optimizers
model.compile(optimizer='adam',
loss=tf.keras.losses.categorical_crossentropy,
metrics=['accuracy'])
model.summary()
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
h_callback = model.fit(X_train, y_train, epochs = 100,
validation_data=(X_val, y_val), callbacks=[early_stopping,reduce_lr])
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
Model: "sequential_3"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_9 (Conv2D) (None, 26, 26, 32) 320
max_pooling2d_7 (MaxPooling (None, 13, 13, 32) 0
2D)
dropout_8 (Dropout) (None, 13, 13, 32) 0
conv2d_10 (Conv2D) (None, 11, 11, 64) 18496
max_pooling2d_8 (MaxPooling (None, 5, 5, 64) 0
2D)
dropout_9 (Dropout) (None, 5, 5, 64) 0
conv2d_11 (Conv2D) (None, 3, 3, 128) 73856
dropout_10 (Dropout) (None, 3, 3, 128) 0
flatten_5 (Flatten) (None, 1152) 0
dense_6 (Dense) (None, 128) 147584
dropout_11 (Dropout) (None, 128) 0
flatten_6 (Flatten) (None, 128) 0
dense_7 (Dense) (None, 10) 1290
=================================================================
Total params: 241,546
Trainable params: 241,546
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
4500/4500 [==============================] - 27s 6ms/step - loss: 0.9895 - accuracy: 0.6957 - val_loss: 0.5402 - val_accuracy: 0.8378 - lr: 0.0010
Epoch 2/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.6946 - accuracy: 0.7795 - val_loss: 0.4251 - val_accuracy: 0.8769 - lr: 0.0010
Epoch 3/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.6294 - accuracy: 0.8032 - val_loss: 0.4005 - val_accuracy: 0.8828 - lr: 0.0010
Epoch 4/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5950 - accuracy: 0.8154 - val_loss: 0.3977 - val_accuracy: 0.8806 - lr: 0.0010
Epoch 5/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5716 - accuracy: 0.8231 - val_loss: 0.3934 - val_accuracy: 0.8853 - lr: 0.0010
Epoch 6/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5568 - accuracy: 0.8288 - val_loss: 0.3701 - val_accuracy: 0.8933 - lr: 0.0010
Epoch 7/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5468 - accuracy: 0.8324 - val_loss: 0.3670 - val_accuracy: 0.8915 - lr: 0.0010
Epoch 8/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5375 - accuracy: 0.8336 - val_loss: 0.3508 - val_accuracy: 0.8970 - lr: 0.0010
Epoch 9/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5280 - accuracy: 0.8380 - val_loss: 0.3535 - val_accuracy: 0.8963 - lr: 0.0010
Epoch 10/100
4500/4500 [==============================] - 23s 5ms/step - loss: 0.5180 - accuracy: 0.8406 - val_loss: 0.3450 - val_accuracy: 0.8997 - lr: 0.0010
Epoch 11/100
4500/4500 [==============================] - 21s 5ms/step - loss: 0.5134 - accuracy: 0.8429 - val_loss: 0.3592 - val_accuracy: 0.8975 - lr: 0.0010
Epoch 12/100
4500/4500 [==============================] - 21s 5ms/step - loss: 0.5082 - accuracy: 0.8443 - val_loss: 0.3360 - val_accuracy: 0.9015 - lr: 0.0010
Epoch 13/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.5028 - accuracy: 0.8454 - val_loss: 0.3334 - val_accuracy: 0.9042 - lr: 0.0010
Epoch 14/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.5004 - accuracy: 0.8462 - val_loss: 0.3310 - val_accuracy: 0.9041 - lr: 0.0010
Epoch 15/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4967 - accuracy: 0.8481 - val_loss: 0.3575 - val_accuracy: 0.8903 - lr: 0.0010
Epoch 16/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.4921 - accuracy: 0.8494 - val_loss: 0.3187 - val_accuracy: 0.9082 - lr: 0.0010
Epoch 17/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4898 - accuracy: 0.8493 - val_loss: 0.3336 - val_accuracy: 0.9045 - lr: 0.0010
Epoch 18/100
4500/4500 [==============================] - 23s 5ms/step - loss: 0.4903 - accuracy: 0.8514 - val_loss: 0.3275 - val_accuracy: 0.9061 - lr: 0.0010
Epoch 19/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4846 - accuracy: 0.8527 - val_loss: 0.3391 - val_accuracy: 0.9016 - lr: 0.0010
Epoch 20/100
4500/4500 [==============================] - 22s 5ms/step - loss: 0.4819 - accuracy: 0.8520 - val_loss: 0.3220 - val_accuracy: 0.9072 - lr: 0.0010
Epoch 21/100
4500/4500 [==============================] - 24s 5ms/step - loss: 0.4786 - accuracy: 0.8526 - val_loss: 0.3303 - val_accuracy: 0.9086 - lr: 0.0010
Epoch 22/100
4500/4500 [==============================] - 27s 6ms/step - loss: 0.4763 - accuracy: 0.8545 - val_loss: 0.3295 - val_accuracy: 0.9038 - lr: 0.0010
Epoch 23/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4795 - accuracy: 0.8531 - val_loss: 0.3205 - val_accuracy: 0.9088 - lr: 0.0010
Epoch 24/100
4500/4500 [==============================] - 27s 6ms/step - loss: 0.4766 - accuracy: 0.8543 - val_loss: 0.3290 - val_accuracy: 0.9040 - lr: 0.0010
Epoch 25/100
4500/4500 [==============================] - 27s 6ms/step - loss: 0.4752 - accuracy: 0.8540 - val_loss: 0.3125 - val_accuracy: 0.9129 - lr: 0.0010
Epoch 26/100
4500/4500 [==============================] - 27s 6ms/step - loss: 0.4719 - accuracy: 0.8558 - val_loss: 0.3275 - val_accuracy: 0.9057 - lr: 0.0010
Epoch 27/100
4500/4500 [==============================] - 27s 6ms/step - loss: 0.4710 - accuracy: 0.8558 - val_loss: 0.3384 - val_accuracy: 0.9033 - lr: 0.0010
Epoch 28/100
4500/4500 [==============================] - 27s 6ms/step - loss: 0.4697 - accuracy: 0.8561 - val_loss: 0.3251 - val_accuracy: 0.9059 - lr: 0.0010
Epoch 29/100
4500/4500 [==============================] - 27s 6ms/step - loss: 0.4669 - accuracy: 0.8582 - val_loss: 0.3167 - val_accuracy: 0.9097 - lr: 0.0010
Epoch 30/100
4500/4500 [==============================] - 27s 6ms/step - loss: 0.4700 - accuracy: 0.8570 - val_loss: 0.3096 - val_accuracy: 0.9114 - lr: 0.0010
Epoch 31/100
4500/4500 [==============================] - 23s 5ms/step - loss: 0.4670 - accuracy: 0.8568 - val_loss: 0.3204 - val_accuracy: 0.9083 - lr: 0.0010
Epoch 32/100
4500/4500 [==============================] - 24s 5ms/step - loss: 0.4632 - accuracy: 0.8584 - val_loss: 0.3319 - val_accuracy: 0.9028 - lr: 0.0010
Epoch 33/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4629 - accuracy: 0.8586 - val_loss: 0.3299 - val_accuracy: 0.9082 - lr: 0.0010
Epoch 34/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4630 - accuracy: 0.8593 - val_loss: 0.3170 - val_accuracy: 0.9112 - lr: 0.0010
Epoch 35/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4619 - accuracy: 0.8584 - val_loss: 0.3063 - val_accuracy: 0.9094 - lr: 0.0010
Epoch 36/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4618 - accuracy: 0.8595 - val_loss: 0.3404 - val_accuracy: 0.9038 - lr: 0.0010
Epoch 37/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4609 - accuracy: 0.8592 - val_loss: 0.3058 - val_accuracy: 0.9141 - lr: 0.0010
Epoch 38/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4606 - accuracy: 0.8593 - val_loss: 0.3060 - val_accuracy: 0.9105 - lr: 0.0010
Epoch 39/100
4500/4500 [==============================] - 27s 6ms/step - loss: 0.4609 - accuracy: 0.8598 - val_loss: 0.3043 - val_accuracy: 0.9133 - lr: 0.0010
Epoch 40/100
4500/4500 [==============================] - 27s 6ms/step - loss: 0.4583 - accuracy: 0.8605 - val_loss: 0.3068 - val_accuracy: 0.9137 - lr: 0.0010
Epoch 41/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4564 - accuracy: 0.8611 - val_loss: 0.3053 - val_accuracy: 0.9128 - lr: 0.0010
Epoch 42/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4544 - accuracy: 0.8614 - val_loss: 0.3259 - val_accuracy: 0.9060 - lr: 0.0010
Epoch 43/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4554 - accuracy: 0.8615 - val_loss: 0.3222 - val_accuracy: 0.9038 - lr: 0.0010
Epoch 44/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4552 - accuracy: 0.8618 - val_loss: 0.3057 - val_accuracy: 0.9133 - lr: 0.0010
Epoch 45/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4547 - accuracy: 0.8611 - val_loss: 0.3088 - val_accuracy: 0.9122 - lr: 0.0010
Epoch 46/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4539 - accuracy: 0.8626 - val_loss: 0.3077 - val_accuracy: 0.9111 - lr: 0.0010
Epoch 47/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4536 - accuracy: 0.8621 - val_loss: 0.3463 - val_accuracy: 0.8936 - lr: 0.0010
Epoch 48/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4566 - accuracy: 0.8618 - val_loss: 0.2999 - val_accuracy: 0.9147 - lr: 0.0010
Epoch 49/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4517 - accuracy: 0.8626 - val_loss: 0.3169 - val_accuracy: 0.9098 - lr: 0.0010
Epoch 50/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4509 - accuracy: 0.8640 - val_loss: 0.3211 - val_accuracy: 0.9105 - lr: 0.0010
Epoch 51/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4513 - accuracy: 0.8622 - val_loss: 0.3165 - val_accuracy: 0.9133 - lr: 0.0010
Epoch 52/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4500 - accuracy: 0.8624 - val_loss: 0.3152 - val_accuracy: 0.9143 - lr: 0.0010
Epoch 53/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4486 - accuracy: 0.8643 - val_loss: 0.2967 - val_accuracy: 0.9156 - lr: 0.0010
Epoch 54/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4496 - accuracy: 0.8634 - val_loss: 0.3325 - val_accuracy: 0.9012 - lr: 0.0010
Epoch 55/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4499 - accuracy: 0.8639 - val_loss: 0.3083 - val_accuracy: 0.9157 - lr: 0.0010
Epoch 56/100
4500/4500 [==============================] - 27s 6ms/step - loss: 0.4481 - accuracy: 0.8648 - val_loss: 0.3067 - val_accuracy: 0.9165 - lr: 0.0010
Epoch 57/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4508 - accuracy: 0.8634 - val_loss: 0.3177 - val_accuracy: 0.9063 - lr: 0.0010
Epoch 58/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4484 - accuracy: 0.8633 - val_loss: 0.3162 - val_accuracy: 0.9066 - lr: 0.0010
Epoch 59/100
4500/4500 [==============================] - 27s 6ms/step - loss: 0.4494 - accuracy: 0.8635 - val_loss: 0.2949 - val_accuracy: 0.9149 - lr: 0.0010
Epoch 60/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4467 - accuracy: 0.8645 - val_loss: 0.3015 - val_accuracy: 0.9148 - lr: 0.0010
Epoch 61/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4476 - accuracy: 0.8636 - val_loss: 0.3047 - val_accuracy: 0.9124 - lr: 0.0010
Epoch 62/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4443 - accuracy: 0.8650 - val_loss: 0.3170 - val_accuracy: 0.9123 - lr: 0.0010
Epoch 63/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4471 - accuracy: 0.8647 - val_loss: 0.3317 - val_accuracy: 0.9099 - lr: 0.0010
Epoch 64/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4454 - accuracy: 0.8657 - val_loss: 0.3035 - val_accuracy: 0.9168 - lr: 0.0010
Epoch 65/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4424 - accuracy: 0.8655 - val_loss: 0.2974 - val_accuracy: 0.9152 - lr: 0.0010
Epoch 66/100
4500/4500 [==============================] - 26s 6ms/step - loss: 0.4444 - accuracy: 0.8648 - val_loss: 0.3237 - val_accuracy: 0.9112 - lr: 0.0010
Epoch 67/100
4500/4500 [==============================] - 24s 5ms/step - loss: 0.4433 - accuracy: 0.8647 - val_loss: 0.3310 - val_accuracy: 0.9109 - lr: 0.0010
Epoch 68/100
4500/4500 [==============================] - 25s 6ms/step - loss: 0.4430 - accuracy: 0.8645 - val_loss: 0.3343 - val_accuracy: 0.9005 - lr: 0.0010
Epoch 69/100
4500/4500 [==============================] - 27s 6ms/step - loss: 0.4446 - accuracy: 0.8657 - val_loss: 0.2968 - val_accuracy: 0.9186 - lr: 0.0010
Epoch 69: early stopping

from numpy import mean
from numpy import std
from matplotlib import pyplot
from sklearn.model_selection import KFold
from keras.datasets import fashion_mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(100, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(10, activation='softmax'))
# compile model
opt = SGD(learning_rate=0.01, momentum=0.9)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
# fit model
h_callback = model.fit(X_train, y_train, epochs = 10,
validation_data=(X_val, y_val))
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
# Plot train vs test accuracy during training
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
precision,recall,f1 = get_metrics(model,X_test)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['Machine Learning Mastery model',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
Epoch 1/10 4500/4500 [==============================] - 17s 4ms/step - loss: 0.6228 - accuracy: 0.7748 - val_loss: 0.3360 - val_accuracy: 0.8807 Epoch 2/10 4500/4500 [==============================] - 16s 3ms/step - loss: 0.4367 - accuracy: 0.8417 - val_loss: 0.3283 - val_accuracy: 0.8831 Epoch 3/10 4500/4500 [==============================] - 16s 3ms/step - loss: 0.3760 - accuracy: 0.8620 - val_loss: 0.2817 - val_accuracy: 0.9001 Epoch 4/10 4500/4500 [==============================] - 16s 3ms/step - loss: 0.3347 - accuracy: 0.8766 - val_loss: 0.2832 - val_accuracy: 0.9004 Epoch 5/10 4500/4500 [==============================] - 16s 3ms/step - loss: 0.3030 - accuracy: 0.8880 - val_loss: 0.2947 - val_accuracy: 0.8954 Epoch 6/10 4500/4500 [==============================] - 16s 3ms/step - loss: 0.2726 - accuracy: 0.8988 - val_loss: 0.2887 - val_accuracy: 0.9010 Epoch 7/10 4500/4500 [==============================] - 16s 3ms/step - loss: 0.2481 - accuracy: 0.9076 - val_loss: 0.2834 - val_accuracy: 0.9053 Epoch 8/10 4500/4500 [==============================] - 16s 3ms/step - loss: 0.2267 - accuracy: 0.9164 - val_loss: 0.2864 - val_accuracy: 0.9041 Epoch 9/10 4500/4500 [==============================] - 16s 4ms/step - loss: 0.2071 - accuracy: 0.9227 - val_loss: 0.2926 - val_accuracy: 0.9039 Epoch 10/10 4500/4500 [==============================] - 16s 4ms/step - loss: 0.1868 - accuracy: 0.9304 - val_loss: 0.3146 - val_accuracy: 0.8978
313/313 [==============================] - 0s 1ms/step
pred = model.predict(X_test)
pred = np.argmax(pred, axis=1)
classifation_matrix = confusion_matrix(y_test_label, pred)
# plot confusion matrix
plt.figure(figsize=(10,10))
sns.heatmap(classifation_matrix, annot=True, fmt="d")
plt.title("Confusion matrix")
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
gc.collect()
tf.keras.backend.clear_session()
del model
313/313 [==============================] - 0s 1ms/step
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
final_model = Sequential()
final_model.add(Conv2D(32, kernel_size=(3, 3),activation='linear',kernel_initializer='he_normal',input_shape=(28,28,1)))
final_model.add(LeakyReLU(alpha=0.1))
final_model.add(MaxPooling2D((2, 2)))
final_model.add(BatchNormalization())
final_model.add(Dropout(0.25))
final_model.add(Conv2D(64, (5, 5), activation='linear'))
final_model.add(LeakyReLU(alpha=0.1))
final_model.add(BatchNormalization())
final_model.add(MaxPooling2D(pool_size=(3, 3)))
final_model.add(Dropout(0.25))
final_model.add(Conv2D(128, (3, 3), activation='linear'))
final_model.add(LeakyReLU(alpha=0.1))
final_model.add(BatchNormalization())
final_model.add(Dropout(0.4))
final_model.add(Flatten())
final_model.add(Dense(128, activation='linear'))
final_model.add(LeakyReLU(alpha=0.1))
final_model.add(Dropout(0.3))
final_model.add(Flatten())
final_model.add(Dense(10, activation='softmax', kernel_regularizer=tensorflow.keras.regularizers.L1(0.01)))
final_model.compile(optimizer='adam',
loss=tf.keras.losses.categorical_crossentropy,
metrics=['accuracy'])
final_model.summary()
early_stopping = EarlyStopping(monitor='val_loss', patience=20, verbose=1)
mc = ModelCheckpoint('best_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
h_callback = final_model.fit(X_train, y_train, epochs = 200,
validation_data=(X_val, y_val), callbacks=[early_stopping,mc,reduce_lr] , batch_size=64)
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
saved_model = load_model('best_model.h5')
# evaluate the model
_, train_acc = saved_model.evaluate(X_train, y_train, verbose=0)
_, test_acc = saved_model.evaluate(X_test, y_test, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))
precision,recall,f1 = get_metrics(saved_model,X_test)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['Fan in final model',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
leaky_re_lu (LeakyReLU) (None, 26, 26, 32) 0
max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0
)
batch_normalization (BatchN (None, 13, 13, 32) 128
ormalization)
dropout (Dropout) (None, 13, 13, 32) 0
conv2d_1 (Conv2D) (None, 9, 9, 64) 51264
leaky_re_lu_1 (LeakyReLU) (None, 9, 9, 64) 0
batch_normalization_1 (Batc (None, 9, 9, 64) 256
hNormalization)
max_pooling2d_1 (MaxPooling (None, 3, 3, 64) 0
2D)
dropout_1 (Dropout) (None, 3, 3, 64) 0
conv2d_2 (Conv2D) (None, 1, 1, 128) 73856
leaky_re_lu_2 (LeakyReLU) (None, 1, 1, 128) 0
batch_normalization_2 (Batc (None, 1, 1, 128) 512
hNormalization)
dropout_2 (Dropout) (None, 1, 1, 128) 0
flatten (Flatten) (None, 128) 0
dense (Dense) (None, 128) 16512
leaky_re_lu_3 (LeakyReLU) (None, 128) 0
dropout_3 (Dropout) (None, 128) 0
flatten_1 (Flatten) (None, 128) 0
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 144,138
Trainable params: 143,690
Non-trainable params: 448
_________________________________________________________________
Epoch 1/200
2249/2250 [============================>.] - ETA: 0s - loss: 1.0818 - accuracy: 0.7051
Epoch 1: val_accuracy improved from -inf to 0.85308, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 1.0816 - accuracy: 0.7051 - val_loss: 0.5059 - val_accuracy: 0.8531 - lr: 0.0010
Epoch 2/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.7070 - accuracy: 0.7840
Epoch 2: val_accuracy improved from 0.85308 to 0.87692, saving model to best_model.h5
2250/2250 [==============================] - 14s 6ms/step - loss: 0.7068 - accuracy: 0.7841 - val_loss: 0.4260 - val_accuracy: 0.8769 - lr: 0.0010
Epoch 3/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.6363 - accuracy: 0.8046
Epoch 3: val_accuracy improved from 0.87692 to 0.88558, saving model to best_model.h5
2250/2250 [==============================] - 14s 6ms/step - loss: 0.6362 - accuracy: 0.8046 - val_loss: 0.3978 - val_accuracy: 0.8856 - lr: 0.0010
Epoch 4/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.6039 - accuracy: 0.8137
Epoch 4: val_accuracy improved from 0.88558 to 0.88642, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.6037 - accuracy: 0.8138 - val_loss: 0.3917 - val_accuracy: 0.8864 - lr: 0.0010
Epoch 5/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.5788 - accuracy: 0.8219
Epoch 5: val_accuracy improved from 0.88642 to 0.89617, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5788 - accuracy: 0.8218 - val_loss: 0.3611 - val_accuracy: 0.8962 - lr: 0.0010
Epoch 6/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.5637 - accuracy: 0.8272
Epoch 6: val_accuracy improved from 0.89617 to 0.90050, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5636 - accuracy: 0.8272 - val_loss: 0.3641 - val_accuracy: 0.9005 - lr: 0.0010
Epoch 7/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.5512 - accuracy: 0.8319
Epoch 7: val_accuracy did not improve from 0.90050
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5512 - accuracy: 0.8319 - val_loss: 0.3585 - val_accuracy: 0.8954 - lr: 0.0010
Epoch 8/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.5411 - accuracy: 0.8348
Epoch 8: val_accuracy improved from 0.90050 to 0.90233, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5412 - accuracy: 0.8347 - val_loss: 0.3409 - val_accuracy: 0.9023 - lr: 0.0010
Epoch 9/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.5313 - accuracy: 0.8372
Epoch 9: val_accuracy improved from 0.90233 to 0.90250, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5311 - accuracy: 0.8373 - val_loss: 0.3362 - val_accuracy: 0.9025 - lr: 0.0010
Epoch 10/200
2250/2250 [==============================] - ETA: 0s - loss: 0.5261 - accuracy: 0.8396
Epoch 10: val_accuracy improved from 0.90250 to 0.90683, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5261 - accuracy: 0.8396 - val_loss: 0.3322 - val_accuracy: 0.9068 - lr: 0.0010
Epoch 11/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.5187 - accuracy: 0.8410
Epoch 11: val_accuracy did not improve from 0.90683
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5188 - accuracy: 0.8410 - val_loss: 0.3370 - val_accuracy: 0.9054 - lr: 0.0010
Epoch 12/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.5139 - accuracy: 0.8430
Epoch 12: val_accuracy did not improve from 0.90683
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5139 - accuracy: 0.8429 - val_loss: 0.3436 - val_accuracy: 0.8988 - lr: 0.0010
Epoch 13/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.5106 - accuracy: 0.8445
Epoch 13: val_accuracy did not improve from 0.90683
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5106 - accuracy: 0.8445 - val_loss: 0.3462 - val_accuracy: 0.8991 - lr: 0.0010
Epoch 14/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.5032 - accuracy: 0.8469
Epoch 14: val_accuracy did not improve from 0.90683
2250/2250 [==============================] - 16s 7ms/step - loss: 0.5032 - accuracy: 0.8469 - val_loss: 0.3467 - val_accuracy: 0.8919 - lr: 0.0010
Epoch 15/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4995 - accuracy: 0.8482
Epoch 15: val_accuracy improved from 0.90683 to 0.91133, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4994 - accuracy: 0.8482 - val_loss: 0.3137 - val_accuracy: 0.9113 - lr: 0.0010
Epoch 16/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4974 - accuracy: 0.8488
Epoch 16: val_accuracy improved from 0.91133 to 0.91175, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4975 - accuracy: 0.8488 - val_loss: 0.3142 - val_accuracy: 0.9118 - lr: 0.0010
Epoch 17/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4936 - accuracy: 0.8488
Epoch 17: val_accuracy did not improve from 0.91175
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4936 - accuracy: 0.8488 - val_loss: 0.3197 - val_accuracy: 0.9098 - lr: 0.0010
Epoch 18/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4937 - accuracy: 0.8511
Epoch 18: val_accuracy did not improve from 0.91175
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4938 - accuracy: 0.8511 - val_loss: 0.3320 - val_accuracy: 0.9057 - lr: 0.0010
Epoch 19/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4920 - accuracy: 0.8506
Epoch 19: val_accuracy did not improve from 0.91175
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4919 - accuracy: 0.8507 - val_loss: 0.3580 - val_accuracy: 0.8920 - lr: 0.0010
Epoch 20/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4907 - accuracy: 0.8511
Epoch 20: val_accuracy did not improve from 0.91175
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4907 - accuracy: 0.8511 - val_loss: 0.3290 - val_accuracy: 0.9021 - lr: 0.0010
Epoch 21/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4853 - accuracy: 0.8528
Epoch 21: val_accuracy did not improve from 0.91175
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4853 - accuracy: 0.8528 - val_loss: 0.3237 - val_accuracy: 0.9060 - lr: 0.0010
Epoch 22/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4834 - accuracy: 0.8534
Epoch 22: val_accuracy did not improve from 0.91175
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4834 - accuracy: 0.8534 - val_loss: 0.3163 - val_accuracy: 0.9111 - lr: 0.0010
Epoch 23/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4818 - accuracy: 0.8528
Epoch 23: val_accuracy improved from 0.91175 to 0.91267, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4817 - accuracy: 0.8528 - val_loss: 0.3091 - val_accuracy: 0.9127 - lr: 0.0010
Epoch 24/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4814 - accuracy: 0.8536
Epoch 24: val_accuracy improved from 0.91267 to 0.91308, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4812 - accuracy: 0.8537 - val_loss: 0.3108 - val_accuracy: 0.9131 - lr: 0.0010
Epoch 25/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4778 - accuracy: 0.8546
Epoch 25: val_accuracy did not improve from 0.91308
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4778 - accuracy: 0.8546 - val_loss: 0.3106 - val_accuracy: 0.9131 - lr: 0.0010
Epoch 26/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4778 - accuracy: 0.8559
Epoch 26: val_accuracy did not improve from 0.91308
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4778 - accuracy: 0.8559 - val_loss: 0.3123 - val_accuracy: 0.9118 - lr: 0.0010
Epoch 27/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4758 - accuracy: 0.8561
Epoch 27: val_accuracy improved from 0.91308 to 0.91442, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4757 - accuracy: 0.8561 - val_loss: 0.3180 - val_accuracy: 0.9144 - lr: 0.0010
Epoch 28/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4728 - accuracy: 0.8567
Epoch 28: val_accuracy did not improve from 0.91442
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4727 - accuracy: 0.8567 - val_loss: 0.3173 - val_accuracy: 0.9091 - lr: 0.0010
Epoch 29/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4724 - accuracy: 0.8577
Epoch 29: val_accuracy did not improve from 0.91442
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4724 - accuracy: 0.8577 - val_loss: 0.3140 - val_accuracy: 0.9092 - lr: 0.0010
Epoch 30/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4697 - accuracy: 0.8575
Epoch 30: val_accuracy improved from 0.91442 to 0.91475, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4698 - accuracy: 0.8575 - val_loss: 0.3073 - val_accuracy: 0.9147 - lr: 0.0010
Epoch 31/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4687 - accuracy: 0.8588
Epoch 31: val_accuracy did not improve from 0.91475
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4688 - accuracy: 0.8588 - val_loss: 0.2999 - val_accuracy: 0.9138 - lr: 0.0010
Epoch 32/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4688 - accuracy: 0.8586
Epoch 32: val_accuracy did not improve from 0.91475
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4688 - accuracy: 0.8586 - val_loss: 0.3021 - val_accuracy: 0.9135 - lr: 0.0010
Epoch 33/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4660 - accuracy: 0.8586
Epoch 33: val_accuracy did not improve from 0.91475
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4660 - accuracy: 0.8586 - val_loss: 0.3058 - val_accuracy: 0.9140 - lr: 0.0010
Epoch 34/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4651 - accuracy: 0.8596
Epoch 34: val_accuracy did not improve from 0.91475
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4651 - accuracy: 0.8596 - val_loss: 0.3047 - val_accuracy: 0.9133 - lr: 0.0010
Epoch 35/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4647 - accuracy: 0.8602
Epoch 35: val_accuracy did not improve from 0.91475
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4648 - accuracy: 0.8602 - val_loss: 0.3200 - val_accuracy: 0.9112 - lr: 0.0010
Epoch 36/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4633 - accuracy: 0.8599
Epoch 36: val_accuracy did not improve from 0.91475
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4635 - accuracy: 0.8598 - val_loss: 0.3075 - val_accuracy: 0.9138 - lr: 0.0010
Epoch 37/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4622 - accuracy: 0.8604
Epoch 37: val_accuracy did not improve from 0.91475
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4624 - accuracy: 0.8603 - val_loss: 0.3119 - val_accuracy: 0.9101 - lr: 0.0010
Epoch 38/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4601 - accuracy: 0.8616
Epoch 38: val_accuracy did not improve from 0.91475
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4602 - accuracy: 0.8616 - val_loss: 0.3155 - val_accuracy: 0.9076 - lr: 0.0010
Epoch 39/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4625 - accuracy: 0.8601
Epoch 39: val_accuracy improved from 0.91475 to 0.91667, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4626 - accuracy: 0.8601 - val_loss: 0.2951 - val_accuracy: 0.9167 - lr: 0.0010
Epoch 40/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4587 - accuracy: 0.8607
Epoch 40: val_accuracy did not improve from 0.91667
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4587 - accuracy: 0.8607 - val_loss: 0.3096 - val_accuracy: 0.9147 - lr: 0.0010
Epoch 41/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4580 - accuracy: 0.8611
Epoch 41: val_accuracy improved from 0.91667 to 0.91700, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4580 - accuracy: 0.8612 - val_loss: 0.2949 - val_accuracy: 0.9170 - lr: 0.0010
Epoch 42/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4584 - accuracy: 0.8618
Epoch 42: val_accuracy did not improve from 0.91700
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4584 - accuracy: 0.8618 - val_loss: 0.3084 - val_accuracy: 0.9117 - lr: 0.0010
Epoch 43/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4559 - accuracy: 0.8627
Epoch 43: val_accuracy did not improve from 0.91700
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4560 - accuracy: 0.8627 - val_loss: 0.2960 - val_accuracy: 0.9147 - lr: 0.0010
Epoch 44/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4608 - accuracy: 0.8616
Epoch 44: val_accuracy did not improve from 0.91700
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4609 - accuracy: 0.8616 - val_loss: 0.3066 - val_accuracy: 0.9150 - lr: 0.0010
Epoch 45/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4565 - accuracy: 0.8622
Epoch 45: val_accuracy did not improve from 0.91700
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4565 - accuracy: 0.8621 - val_loss: 0.3093 - val_accuracy: 0.9128 - lr: 0.0010
Epoch 46/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4525 - accuracy: 0.8635
Epoch 46: val_accuracy did not improve from 0.91700
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4525 - accuracy: 0.8635 - val_loss: 0.3028 - val_accuracy: 0.9158 - lr: 0.0010
Epoch 47/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4542 - accuracy: 0.8634
Epoch 47: val_accuracy did not improve from 0.91700
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4542 - accuracy: 0.8634 - val_loss: 0.3019 - val_accuracy: 0.9158 - lr: 0.0010
Epoch 48/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4545 - accuracy: 0.8634
Epoch 48: val_accuracy improved from 0.91700 to 0.91767, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4544 - accuracy: 0.8634 - val_loss: 0.2928 - val_accuracy: 0.9177 - lr: 0.0010
Epoch 49/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4548 - accuracy: 0.8635
Epoch 49: val_accuracy did not improve from 0.91767
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4548 - accuracy: 0.8636 - val_loss: 0.3017 - val_accuracy: 0.9150 - lr: 0.0010
Epoch 50/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4527 - accuracy: 0.8637
Epoch 50: val_accuracy did not improve from 0.91767
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4530 - accuracy: 0.8637 - val_loss: 0.3134 - val_accuracy: 0.9105 - lr: 0.0010
Epoch 51/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4496 - accuracy: 0.8650
Epoch 51: val_accuracy did not improve from 0.91767
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4495 - accuracy: 0.8650 - val_loss: 0.3004 - val_accuracy: 0.9159 - lr: 0.0010
Epoch 52/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4511 - accuracy: 0.8647
Epoch 52: val_accuracy did not improve from 0.91767
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4510 - accuracy: 0.8647 - val_loss: 0.3061 - val_accuracy: 0.9161 - lr: 0.0010
Epoch 53/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4486 - accuracy: 0.8647
Epoch 53: val_accuracy did not improve from 0.91767
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4486 - accuracy: 0.8647 - val_loss: 0.2922 - val_accuracy: 0.9176 - lr: 0.0010
Epoch 54/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4489 - accuracy: 0.8659
Epoch 54: val_accuracy did not improve from 0.91767
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4489 - accuracy: 0.8659 - val_loss: 0.2996 - val_accuracy: 0.9153 - lr: 0.0010
Epoch 55/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4475 - accuracy: 0.8647
Epoch 55: val_accuracy did not improve from 0.91767
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4476 - accuracy: 0.8647 - val_loss: 0.2932 - val_accuracy: 0.9150 - lr: 0.0010
Epoch 56/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4487 - accuracy: 0.8661
Epoch 56: val_accuracy did not improve from 0.91767
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4486 - accuracy: 0.8662 - val_loss: 0.3077 - val_accuracy: 0.9129 - lr: 0.0010
Epoch 57/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4462 - accuracy: 0.8662
Epoch 57: val_accuracy did not improve from 0.91767
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4465 - accuracy: 0.8660 - val_loss: 0.2949 - val_accuracy: 0.9163 - lr: 0.0010
Epoch 58/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4472 - accuracy: 0.8654
Epoch 58: val_accuracy did not improve from 0.91767
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4473 - accuracy: 0.8654 - val_loss: 0.3121 - val_accuracy: 0.9128 - lr: 0.0010
Epoch 59/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4489 - accuracy: 0.8654
Epoch 59: val_accuracy improved from 0.91767 to 0.91800, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4489 - accuracy: 0.8654 - val_loss: 0.3061 - val_accuracy: 0.9180 - lr: 0.0010
Epoch 60/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4471 - accuracy: 0.8657
Epoch 60: val_accuracy did not improve from 0.91800
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4471 - accuracy: 0.8657 - val_loss: 0.3028 - val_accuracy: 0.9112 - lr: 0.0010
Epoch 61/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4452 - accuracy: 0.8656
Epoch 61: val_accuracy did not improve from 0.91800
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4452 - accuracy: 0.8656 - val_loss: 0.3036 - val_accuracy: 0.9148 - lr: 0.0010
Epoch 62/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4477 - accuracy: 0.8668
Epoch 62: val_accuracy did not improve from 0.91800
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4478 - accuracy: 0.8668 - val_loss: 0.3035 - val_accuracy: 0.9174 - lr: 0.0010
Epoch 63/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4464 - accuracy: 0.8655
Epoch 63: val_accuracy did not improve from 0.91800
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4463 - accuracy: 0.8656 - val_loss: 0.2960 - val_accuracy: 0.9144 - lr: 0.0010
Epoch 64/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4441 - accuracy: 0.8670
Epoch 64: val_accuracy did not improve from 0.91800
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4441 - accuracy: 0.8670 - val_loss: 0.2985 - val_accuracy: 0.9150 - lr: 0.0010
Epoch 65/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4426 - accuracy: 0.8671
Epoch 65: val_accuracy did not improve from 0.91800
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4424 - accuracy: 0.8671 - val_loss: 0.3052 - val_accuracy: 0.9175 - lr: 0.0010
Epoch 66/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4427 - accuracy: 0.8663
Epoch 66: val_accuracy did not improve from 0.91800
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4428 - accuracy: 0.8663 - val_loss: 0.3003 - val_accuracy: 0.9161 - lr: 0.0010
Epoch 67/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4429 - accuracy: 0.8669
Epoch 67: val_accuracy did not improve from 0.91800
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4428 - accuracy: 0.8670 - val_loss: 0.2916 - val_accuracy: 0.9158 - lr: 0.0010
Epoch 68/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4418 - accuracy: 0.8673
Epoch 68: val_accuracy did not improve from 0.91800
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4421 - accuracy: 0.8672 - val_loss: 0.3134 - val_accuracy: 0.9128 - lr: 0.0010
Epoch 69/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4440 - accuracy: 0.8679
Epoch 69: val_accuracy did not improve from 0.91800
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4440 - accuracy: 0.8679 - val_loss: 0.3008 - val_accuracy: 0.9147 - lr: 0.0010
Epoch 70/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4403 - accuracy: 0.8679
Epoch 70: val_accuracy improved from 0.91800 to 0.91975, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4403 - accuracy: 0.8679 - val_loss: 0.2960 - val_accuracy: 0.9197 - lr: 0.0010
Epoch 71/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4406 - accuracy: 0.8673
Epoch 71: val_accuracy did not improve from 0.91975
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4407 - accuracy: 0.8673 - val_loss: 0.2969 - val_accuracy: 0.9128 - lr: 0.0010
Epoch 72/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4381 - accuracy: 0.8681
Epoch 72: val_accuracy did not improve from 0.91975
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4381 - accuracy: 0.8681 - val_loss: 0.2952 - val_accuracy: 0.9158 - lr: 0.0010
Epoch 73/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4404 - accuracy: 0.8680
Epoch 73: val_accuracy did not improve from 0.91975
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4404 - accuracy: 0.8680 - val_loss: 0.3058 - val_accuracy: 0.9143 - lr: 0.0010
Epoch 74/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4345 - accuracy: 0.8691
Epoch 74: val_accuracy improved from 0.91975 to 0.92142, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4345 - accuracy: 0.8691 - val_loss: 0.2878 - val_accuracy: 0.9214 - lr: 0.0010
Epoch 75/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4401 - accuracy: 0.8679
Epoch 75: val_accuracy did not improve from 0.92142
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4401 - accuracy: 0.8679 - val_loss: 0.2867 - val_accuracy: 0.9194 - lr: 0.0010
Epoch 76/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4377 - accuracy: 0.8692
Epoch 76: val_accuracy did not improve from 0.92142
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4377 - accuracy: 0.8692 - val_loss: 0.2974 - val_accuracy: 0.9138 - lr: 0.0010
Epoch 77/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4382 - accuracy: 0.8685
Epoch 77: val_accuracy did not improve from 0.92142
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4382 - accuracy: 0.8685 - val_loss: 0.3005 - val_accuracy: 0.9190 - lr: 0.0010
Epoch 78/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4374 - accuracy: 0.8693
Epoch 78: val_accuracy did not improve from 0.92142
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4373 - accuracy: 0.8693 - val_loss: 0.3048 - val_accuracy: 0.9160 - lr: 0.0010
Epoch 79/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4400 - accuracy: 0.8675
Epoch 79: val_accuracy improved from 0.92142 to 0.92158, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4399 - accuracy: 0.8675 - val_loss: 0.2908 - val_accuracy: 0.9216 - lr: 0.0010
Epoch 80/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4359 - accuracy: 0.8696
Epoch 80: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4358 - accuracy: 0.8696 - val_loss: 0.2924 - val_accuracy: 0.9169 - lr: 0.0010
Epoch 81/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4351 - accuracy: 0.8695
Epoch 81: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4351 - accuracy: 0.8695 - val_loss: 0.3155 - val_accuracy: 0.9139 - lr: 0.0010
Epoch 82/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4397 - accuracy: 0.8683
Epoch 82: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4397 - accuracy: 0.8683 - val_loss: 0.3130 - val_accuracy: 0.9107 - lr: 0.0010
Epoch 83/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4369 - accuracy: 0.8696
Epoch 83: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4369 - accuracy: 0.8696 - val_loss: 0.2909 - val_accuracy: 0.9181 - lr: 0.0010
Epoch 84/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4346 - accuracy: 0.8695
Epoch 84: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4347 - accuracy: 0.8695 - val_loss: 0.2993 - val_accuracy: 0.9191 - lr: 0.0010
Epoch 85/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4349 - accuracy: 0.8701
Epoch 85: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4349 - accuracy: 0.8701 - val_loss: 0.2977 - val_accuracy: 0.9189 - lr: 0.0010
Epoch 86/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4370 - accuracy: 0.8682
Epoch 86: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4370 - accuracy: 0.8682 - val_loss: 0.2917 - val_accuracy: 0.9180 - lr: 0.0010
Epoch 87/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4374 - accuracy: 0.8691
Epoch 87: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4374 - accuracy: 0.8691 - val_loss: 0.2861 - val_accuracy: 0.9199 - lr: 0.0010
Epoch 88/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4344 - accuracy: 0.8703
Epoch 88: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4344 - accuracy: 0.8703 - val_loss: 0.3083 - val_accuracy: 0.9152 - lr: 0.0010
Epoch 89/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4375 - accuracy: 0.8693
Epoch 89: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4376 - accuracy: 0.8693 - val_loss: 0.2918 - val_accuracy: 0.9182 - lr: 0.0010
Epoch 90/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4348 - accuracy: 0.8695
Epoch 90: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4347 - accuracy: 0.8695 - val_loss: 0.2940 - val_accuracy: 0.9203 - lr: 0.0010
Epoch 91/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4356 - accuracy: 0.8693
Epoch 91: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4355 - accuracy: 0.8694 - val_loss: 0.2854 - val_accuracy: 0.9204 - lr: 0.0010
Epoch 92/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4333 - accuracy: 0.8700
Epoch 92: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4334 - accuracy: 0.8700 - val_loss: 0.2832 - val_accuracy: 0.9195 - lr: 0.0010
Epoch 93/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4328 - accuracy: 0.8704
Epoch 93: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4329 - accuracy: 0.8703 - val_loss: 0.3042 - val_accuracy: 0.9122 - lr: 0.0010
Epoch 94/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4355 - accuracy: 0.8703
Epoch 94: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4354 - accuracy: 0.8703 - val_loss: 0.2955 - val_accuracy: 0.9172 - lr: 0.0010
Epoch 95/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4346 - accuracy: 0.8702
Epoch 95: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4346 - accuracy: 0.8702 - val_loss: 0.2950 - val_accuracy: 0.9170 - lr: 0.0010
Epoch 96/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4323 - accuracy: 0.8708
Epoch 96: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4323 - accuracy: 0.8708 - val_loss: 0.2841 - val_accuracy: 0.9195 - lr: 0.0010
Epoch 97/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4322 - accuracy: 0.8704
Epoch 97: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4325 - accuracy: 0.8704 - val_loss: 0.2870 - val_accuracy: 0.9194 - lr: 0.0010
Epoch 98/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4341 - accuracy: 0.8698
Epoch 98: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4342 - accuracy: 0.8698 - val_loss: 0.2820 - val_accuracy: 0.9182 - lr: 0.0010
Epoch 99/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4326 - accuracy: 0.8696
Epoch 99: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 15s 6ms/step - loss: 0.4326 - accuracy: 0.8696 - val_loss: 0.3074 - val_accuracy: 0.9121 - lr: 0.0010
Epoch 100/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4322 - accuracy: 0.8709
Epoch 100: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4321 - accuracy: 0.8709 - val_loss: 0.2952 - val_accuracy: 0.9202 - lr: 0.0010
Epoch 101/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4302 - accuracy: 0.8709
Epoch 101: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4301 - accuracy: 0.8710 - val_loss: 0.2838 - val_accuracy: 0.9198 - lr: 0.0010
Epoch 102/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4336 - accuracy: 0.8710
Epoch 102: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4337 - accuracy: 0.8709 - val_loss: 0.2995 - val_accuracy: 0.9186 - lr: 0.0010
Epoch 103/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4309 - accuracy: 0.8714
Epoch 103: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4308 - accuracy: 0.8714 - val_loss: 0.3033 - val_accuracy: 0.9144 - lr: 0.0010
Epoch 104/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4312 - accuracy: 0.8716
Epoch 104: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4310 - accuracy: 0.8717 - val_loss: 0.2971 - val_accuracy: 0.9148 - lr: 0.0010
Epoch 105/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4303 - accuracy: 0.8717
Epoch 105: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4304 - accuracy: 0.8717 - val_loss: 0.3023 - val_accuracy: 0.9122 - lr: 0.0010
Epoch 106/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4305 - accuracy: 0.8722
Epoch 106: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4306 - accuracy: 0.8722 - val_loss: 0.2906 - val_accuracy: 0.9187 - lr: 0.0010
Epoch 107/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4287 - accuracy: 0.8717
Epoch 107: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4288 - accuracy: 0.8717 - val_loss: 0.3040 - val_accuracy: 0.9122 - lr: 0.0010
Epoch 108/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4319 - accuracy: 0.8720
Epoch 108: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4318 - accuracy: 0.8720 - val_loss: 0.2979 - val_accuracy: 0.9172 - lr: 0.0010
Epoch 109/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4298 - accuracy: 0.8720
Epoch 109: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4297 - accuracy: 0.8720 - val_loss: 0.2879 - val_accuracy: 0.9202 - lr: 0.0010
Epoch 110/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4293 - accuracy: 0.8714
Epoch 110: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4294 - accuracy: 0.8714 - val_loss: 0.2884 - val_accuracy: 0.9182 - lr: 0.0010
Epoch 111/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4282 - accuracy: 0.8723
Epoch 111: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4281 - accuracy: 0.8723 - val_loss: 0.2970 - val_accuracy: 0.9182 - lr: 0.0010
Epoch 112/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4263 - accuracy: 0.8726
Epoch 112: val_accuracy did not improve from 0.92158
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4263 - accuracy: 0.8726 - val_loss: 0.2973 - val_accuracy: 0.9187 - lr: 0.0010
Epoch 113/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4309 - accuracy: 0.8714
Epoch 113: val_accuracy improved from 0.92158 to 0.92292, saving model to best_model.h5
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4309 - accuracy: 0.8714 - val_loss: 0.2806 - val_accuracy: 0.9229 - lr: 0.0010
Epoch 114/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4280 - accuracy: 0.8729
Epoch 114: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4280 - accuracy: 0.8729 - val_loss: 0.2873 - val_accuracy: 0.9229 - lr: 0.0010
Epoch 115/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4289 - accuracy: 0.8724
Epoch 115: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4289 - accuracy: 0.8724 - val_loss: 0.2843 - val_accuracy: 0.9220 - lr: 0.0010
Epoch 116/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4306 - accuracy: 0.8708
Epoch 116: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4308 - accuracy: 0.8707 - val_loss: 0.3058 - val_accuracy: 0.9137 - lr: 0.0010
Epoch 117/200
2242/2250 [============================>.] - ETA: 0s - loss: 0.4274 - accuracy: 0.8726
Epoch 117: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4273 - accuracy: 0.8726 - val_loss: 0.2907 - val_accuracy: 0.9192 - lr: 0.0010
Epoch 118/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4268 - accuracy: 0.8721
Epoch 118: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4268 - accuracy: 0.8721 - val_loss: 0.2925 - val_accuracy: 0.9172 - lr: 0.0010
Epoch 119/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4283 - accuracy: 0.8725
Epoch 119: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4282 - accuracy: 0.8725 - val_loss: 0.2881 - val_accuracy: 0.9197 - lr: 0.0010
Epoch 120/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4282 - accuracy: 0.8718
Epoch 120: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4281 - accuracy: 0.8719 - val_loss: 0.2797 - val_accuracy: 0.9219 - lr: 0.0010
Epoch 121/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4298 - accuracy: 0.8718
Epoch 121: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 13s 6ms/step - loss: 0.4297 - accuracy: 0.8718 - val_loss: 0.2889 - val_accuracy: 0.9182 - lr: 0.0010
Epoch 122/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4256 - accuracy: 0.8732
Epoch 122: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4256 - accuracy: 0.8732 - val_loss: 0.2830 - val_accuracy: 0.9223 - lr: 0.0010
Epoch 123/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4273 - accuracy: 0.8721
Epoch 123: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4272 - accuracy: 0.8721 - val_loss: 0.2992 - val_accuracy: 0.9177 - lr: 0.0010
Epoch 124/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4290 - accuracy: 0.8724
Epoch 124: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4290 - accuracy: 0.8723 - val_loss: 0.2866 - val_accuracy: 0.9187 - lr: 0.0010
Epoch 125/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4273 - accuracy: 0.8727
Epoch 125: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4273 - accuracy: 0.8727 - val_loss: 0.2917 - val_accuracy: 0.9218 - lr: 0.0010
Epoch 126/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4252 - accuracy: 0.8735
Epoch 126: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4252 - accuracy: 0.8735 - val_loss: 0.2833 - val_accuracy: 0.9189 - lr: 0.0010
Epoch 127/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4266 - accuracy: 0.8726
Epoch 127: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4265 - accuracy: 0.8726 - val_loss: 0.2962 - val_accuracy: 0.9203 - lr: 0.0010
Epoch 128/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4258 - accuracy: 0.8733
Epoch 128: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4259 - accuracy: 0.8733 - val_loss: 0.2926 - val_accuracy: 0.9168 - lr: 0.0010
Epoch 129/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4237 - accuracy: 0.8736
Epoch 129: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4238 - accuracy: 0.8736 - val_loss: 0.2783 - val_accuracy: 0.9228 - lr: 0.0010
Epoch 130/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4245 - accuracy: 0.8733
Epoch 130: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4245 - accuracy: 0.8733 - val_loss: 0.2903 - val_accuracy: 0.9229 - lr: 0.0010
Epoch 131/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4239 - accuracy: 0.8739
Epoch 131: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4239 - accuracy: 0.8739 - val_loss: 0.2885 - val_accuracy: 0.9192 - lr: 0.0010
Epoch 132/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4259 - accuracy: 0.8733
Epoch 132: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4258 - accuracy: 0.8733 - val_loss: 0.2961 - val_accuracy: 0.9194 - lr: 0.0010
Epoch 133/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4232 - accuracy: 0.8735
Epoch 133: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4232 - accuracy: 0.8735 - val_loss: 0.2833 - val_accuracy: 0.9229 - lr: 0.0010
Epoch 134/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4233 - accuracy: 0.8733
Epoch 134: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4233 - accuracy: 0.8733 - val_loss: 0.2810 - val_accuracy: 0.9227 - lr: 0.0010
Epoch 135/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4228 - accuracy: 0.8740
Epoch 135: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4228 - accuracy: 0.8740 - val_loss: 0.2913 - val_accuracy: 0.9189 - lr: 0.0010
Epoch 136/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4258 - accuracy: 0.8732
Epoch 136: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4259 - accuracy: 0.8732 - val_loss: 0.3063 - val_accuracy: 0.9134 - lr: 0.0010
Epoch 137/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4233 - accuracy: 0.8743
Epoch 137: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4232 - accuracy: 0.8743 - val_loss: 0.2781 - val_accuracy: 0.9224 - lr: 0.0010
Epoch 138/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4251 - accuracy: 0.8729
Epoch 138: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4251 - accuracy: 0.8729 - val_loss: 0.2888 - val_accuracy: 0.9214 - lr: 0.0010
Epoch 139/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4246 - accuracy: 0.8726
Epoch 139: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4244 - accuracy: 0.8727 - val_loss: 0.2953 - val_accuracy: 0.9187 - lr: 0.0010
Epoch 140/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4225 - accuracy: 0.8750
Epoch 140: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4224 - accuracy: 0.8750 - val_loss: 0.3264 - val_accuracy: 0.9120 - lr: 0.0010
Epoch 141/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4211 - accuracy: 0.8750
Epoch 141: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 15s 6ms/step - loss: 0.4210 - accuracy: 0.8751 - val_loss: 0.2871 - val_accuracy: 0.9229 - lr: 0.0010
Epoch 142/200
2242/2250 [============================>.] - ETA: 0s - loss: 0.4217 - accuracy: 0.8740
Epoch 142: val_accuracy did not improve from 0.92292
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4217 - accuracy: 0.8740 - val_loss: 0.2807 - val_accuracy: 0.9202 - lr: 0.0010
Epoch 143/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4216 - accuracy: 0.8740
Epoch 143: val_accuracy improved from 0.92292 to 0.92567, saving model to best_model.h5
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4218 - accuracy: 0.8740 - val_loss: 0.2776 - val_accuracy: 0.9257 - lr: 0.0010
Epoch 144/200
2242/2250 [============================>.] - ETA: 0s - loss: 0.4224 - accuracy: 0.8739
Epoch 144: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4222 - accuracy: 0.8741 - val_loss: 0.2787 - val_accuracy: 0.9213 - lr: 0.0010
Epoch 145/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4219 - accuracy: 0.8736
Epoch 145: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4219 - accuracy: 0.8736 - val_loss: 0.2865 - val_accuracy: 0.9217 - lr: 0.0010
Epoch 146/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4241 - accuracy: 0.8737
Epoch 146: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4241 - accuracy: 0.8737 - val_loss: 0.2870 - val_accuracy: 0.9212 - lr: 0.0010
Epoch 147/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4217 - accuracy: 0.8742
Epoch 147: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4220 - accuracy: 0.8742 - val_loss: 0.2966 - val_accuracy: 0.9187 - lr: 0.0010
Epoch 148/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4203 - accuracy: 0.8744
Epoch 148: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4203 - accuracy: 0.8744 - val_loss: 0.2893 - val_accuracy: 0.9197 - lr: 0.0010
Epoch 149/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4222 - accuracy: 0.8751
Epoch 149: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4222 - accuracy: 0.8750 - val_loss: 0.2815 - val_accuracy: 0.9207 - lr: 0.0010
Epoch 150/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4232 - accuracy: 0.8744
Epoch 150: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4232 - accuracy: 0.8744 - val_loss: 0.2828 - val_accuracy: 0.9209 - lr: 0.0010
Epoch 151/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4241 - accuracy: 0.8750
Epoch 151: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4240 - accuracy: 0.8750 - val_loss: 0.2969 - val_accuracy: 0.9187 - lr: 0.0010
Epoch 152/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4218 - accuracy: 0.8748
Epoch 152: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4216 - accuracy: 0.8748 - val_loss: 0.2879 - val_accuracy: 0.9183 - lr: 0.0010
Epoch 153/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4226 - accuracy: 0.8754
Epoch 153: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4226 - accuracy: 0.8754 - val_loss: 0.2848 - val_accuracy: 0.9209 - lr: 0.0010
Epoch 154/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4222 - accuracy: 0.8750
Epoch 154: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4221 - accuracy: 0.8749 - val_loss: 0.2887 - val_accuracy: 0.9179 - lr: 0.0010
Epoch 155/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4208 - accuracy: 0.8747
Epoch 155: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4208 - accuracy: 0.8747 - val_loss: 0.2917 - val_accuracy: 0.9186 - lr: 0.0010
Epoch 156/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4208 - accuracy: 0.8753
Epoch 156: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4208 - accuracy: 0.8753 - val_loss: 0.2815 - val_accuracy: 0.9226 - lr: 0.0010
Epoch 157/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4217 - accuracy: 0.8742
Epoch 157: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4218 - accuracy: 0.8741 - val_loss: 0.2970 - val_accuracy: 0.9177 - lr: 0.0010
Epoch 158/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4199 - accuracy: 0.8751
Epoch 158: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4199 - accuracy: 0.8751 - val_loss: 0.2883 - val_accuracy: 0.9208 - lr: 0.0010
Epoch 159/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4211 - accuracy: 0.8747
Epoch 159: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4213 - accuracy: 0.8747 - val_loss: 0.2830 - val_accuracy: 0.9198 - lr: 0.0010
Epoch 160/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4212 - accuracy: 0.8741
Epoch 160: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4212 - accuracy: 0.8741 - val_loss: 0.2882 - val_accuracy: 0.9208 - lr: 0.0010
Epoch 161/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4227 - accuracy: 0.8742
Epoch 161: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4227 - accuracy: 0.8742 - val_loss: 0.2807 - val_accuracy: 0.9232 - lr: 0.0010
Epoch 162/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4197 - accuracy: 0.8756
Epoch 162: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4196 - accuracy: 0.8756 - val_loss: 0.2772 - val_accuracy: 0.9225 - lr: 0.0010
Epoch 163/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4191 - accuracy: 0.8746
Epoch 163: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 17s 7ms/step - loss: 0.4192 - accuracy: 0.8746 - val_loss: 0.2934 - val_accuracy: 0.9187 - lr: 0.0010
Epoch 164/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4189 - accuracy: 0.8755
Epoch 164: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 17s 7ms/step - loss: 0.4189 - accuracy: 0.8755 - val_loss: 0.2784 - val_accuracy: 0.9216 - lr: 0.0010
Epoch 165/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4187 - accuracy: 0.8759
Epoch 165: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4187 - accuracy: 0.8759 - val_loss: 0.2796 - val_accuracy: 0.9250 - lr: 0.0010
Epoch 166/200
2242/2250 [============================>.] - ETA: 0s - loss: 0.4209 - accuracy: 0.8755
Epoch 166: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4207 - accuracy: 0.8756 - val_loss: 0.2826 - val_accuracy: 0.9197 - lr: 0.0010
Epoch 167/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4190 - accuracy: 0.8754
Epoch 167: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4190 - accuracy: 0.8754 - val_loss: 0.2902 - val_accuracy: 0.9222 - lr: 0.0010
Epoch 168/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4216 - accuracy: 0.8752
Epoch 168: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4216 - accuracy: 0.8752 - val_loss: 0.2837 - val_accuracy: 0.9244 - lr: 0.0010
Epoch 169/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4186 - accuracy: 0.8752
Epoch 169: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4188 - accuracy: 0.8751 - val_loss: 0.2910 - val_accuracy: 0.9196 - lr: 0.0010
Epoch 170/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4193 - accuracy: 0.8745
Epoch 170: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4193 - accuracy: 0.8745 - val_loss: 0.2915 - val_accuracy: 0.9215 - lr: 0.0010
Epoch 171/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4214 - accuracy: 0.8749
Epoch 171: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4214 - accuracy: 0.8749 - val_loss: 0.2814 - val_accuracy: 0.9237 - lr: 0.0010
Epoch 172/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4170 - accuracy: 0.8759
Epoch 172: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4170 - accuracy: 0.8759 - val_loss: 0.2970 - val_accuracy: 0.9169 - lr: 0.0010
Epoch 173/200
2242/2250 [============================>.] - ETA: 0s - loss: 0.4188 - accuracy: 0.8757
Epoch 173: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4190 - accuracy: 0.8757 - val_loss: 0.2988 - val_accuracy: 0.9167 - lr: 0.0010
Epoch 174/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4162 - accuracy: 0.8771
Epoch 174: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4162 - accuracy: 0.8770 - val_loss: 0.2828 - val_accuracy: 0.9202 - lr: 0.0010
Epoch 175/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4158 - accuracy: 0.8761
Epoch 175: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4158 - accuracy: 0.8761 - val_loss: 0.2866 - val_accuracy: 0.9209 - lr: 0.0010
Epoch 176/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4165 - accuracy: 0.8763
Epoch 176: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4165 - accuracy: 0.8763 - val_loss: 0.2830 - val_accuracy: 0.9212 - lr: 0.0010
Epoch 177/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4182 - accuracy: 0.8760
Epoch 177: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4181 - accuracy: 0.8760 - val_loss: 0.2942 - val_accuracy: 0.9193 - lr: 0.0010
Epoch 178/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4171 - accuracy: 0.8765
Epoch 178: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4171 - accuracy: 0.8765 - val_loss: 0.2757 - val_accuracy: 0.9245 - lr: 0.0010
Epoch 179/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4153 - accuracy: 0.8766
Epoch 179: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4154 - accuracy: 0.8766 - val_loss: 0.2915 - val_accuracy: 0.9212 - lr: 0.0010
Epoch 180/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4176 - accuracy: 0.8764
Epoch 180: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 6ms/step - loss: 0.4176 - accuracy: 0.8764 - val_loss: 0.2903 - val_accuracy: 0.9214 - lr: 0.0010
Epoch 181/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4183 - accuracy: 0.8760
Epoch 181: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4185 - accuracy: 0.8760 - val_loss: 0.2825 - val_accuracy: 0.9208 - lr: 0.0010
Epoch 182/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4195 - accuracy: 0.8746
Epoch 182: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 6ms/step - loss: 0.4196 - accuracy: 0.8746 - val_loss: 0.2903 - val_accuracy: 0.9194 - lr: 0.0010
Epoch 183/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4180 - accuracy: 0.8751
Epoch 183: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4180 - accuracy: 0.8751 - val_loss: 0.3001 - val_accuracy: 0.9152 - lr: 0.0010
Epoch 184/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4189 - accuracy: 0.8757
Epoch 184: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4188 - accuracy: 0.8757 - val_loss: 0.2820 - val_accuracy: 0.9230 - lr: 0.0010
Epoch 185/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4196 - accuracy: 0.8757
Epoch 185: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4195 - accuracy: 0.8758 - val_loss: 0.2956 - val_accuracy: 0.9177 - lr: 0.0010
Epoch 186/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4172 - accuracy: 0.8762
Epoch 186: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4172 - accuracy: 0.8762 - val_loss: 0.2935 - val_accuracy: 0.9205 - lr: 0.0010
Epoch 187/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4173 - accuracy: 0.8764
Epoch 187: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 6ms/step - loss: 0.4173 - accuracy: 0.8765 - val_loss: 0.2876 - val_accuracy: 0.9218 - lr: 0.0010
Epoch 188/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4150 - accuracy: 0.8767
Epoch 188: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4151 - accuracy: 0.8767 - val_loss: 0.2991 - val_accuracy: 0.9176 - lr: 0.0010
Epoch 189/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4168 - accuracy: 0.8759
Epoch 189: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4168 - accuracy: 0.8759 - val_loss: 0.2905 - val_accuracy: 0.9215 - lr: 0.0010
Epoch 190/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4146 - accuracy: 0.8769
Epoch 190: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4146 - accuracy: 0.8769 - val_loss: 0.2832 - val_accuracy: 0.9241 - lr: 0.0010
Epoch 191/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4167 - accuracy: 0.8762
Epoch 191: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4167 - accuracy: 0.8762 - val_loss: 0.2781 - val_accuracy: 0.9247 - lr: 0.0010
Epoch 192/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4179 - accuracy: 0.8755
Epoch 192: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 15s 6ms/step - loss: 0.4179 - accuracy: 0.8755 - val_loss: 0.2825 - val_accuracy: 0.9209 - lr: 0.0010
Epoch 193/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4179 - accuracy: 0.8762
Epoch 193: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4179 - accuracy: 0.8762 - val_loss: 0.2814 - val_accuracy: 0.9227 - lr: 0.0010
Epoch 194/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4143 - accuracy: 0.8773
Epoch 194: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4143 - accuracy: 0.8773 - val_loss: 0.2917 - val_accuracy: 0.9186 - lr: 0.0010
Epoch 195/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4155 - accuracy: 0.8761
Epoch 195: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4156 - accuracy: 0.8761 - val_loss: 0.2990 - val_accuracy: 0.9187 - lr: 0.0010
Epoch 196/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4178 - accuracy: 0.8764
Epoch 196: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4177 - accuracy: 0.8765 - val_loss: 0.2830 - val_accuracy: 0.9227 - lr: 0.0010
Epoch 197/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4147 - accuracy: 0.8765
Epoch 197: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4147 - accuracy: 0.8765 - val_loss: 0.2830 - val_accuracy: 0.9219 - lr: 0.0010
Epoch 198/200
2242/2250 [============================>.] - ETA: 0s - loss: 0.4169 - accuracy: 0.8750
Epoch 198: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4168 - accuracy: 0.8750 - val_loss: 0.2845 - val_accuracy: 0.9208 - lr: 0.0010
Epoch 198: early stopping
Train: 0.924, Test: 0.920 313/313 [==============================] - 1s 2ms/step
import visualkeras
visualkeras.layered_view(saved_model)
pred = saved_model.predict(X_test)
pred = np.argmax(pred, axis=1)
classifation_matrix = confusion_matrix(y_test_label, pred)
# plot confusion matrix
plt.figure(figsize=(10,10))
sns.heatmap(classifation_matrix, annot=True, fmt="d")
plt.title("Confusion matrix")
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
313/313 [==============================] - 1s 2ms/step
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
final_model2 = Sequential()
final_model2.add(Conv2D(128, kernel_size=(3, 3),activation='linear',kernel_initializer='he_normal',input_shape=(28,28,1)))
final_model2.add(LeakyReLU(alpha=0.1))
final_model2.add(MaxPooling2D((2, 2)))
final_model2.add(BatchNormalization())
final_model2.add(Dropout(0.25))
final_model2.add(Conv2D(64, (5, 5), activation='linear'))
final_model2.add(LeakyReLU(alpha=0.1))
final_model2.add(BatchNormalization())
final_model2.add(MaxPooling2D(pool_size=(3, 3)))
final_model2.add(Dropout(0.25))
final_model2.add(Conv2D(32, (3, 3), activation='linear'))
final_model2.add(LeakyReLU(alpha=0.1))
final_model2.add(BatchNormalization())
final_model2.add(Dropout(0.4))
final_model2.add(Flatten())
final_model2.add(Dense(128, activation='linear'))
final_model2.add(LeakyReLU(alpha=0.1))
final_model2.add(Dropout(0.3))
final_model2.add(Dense(64, activation='linear'))
final_model2.add(LeakyReLU(alpha=0.1))
final_model2.add(Dense(10, activation='softmax', kernel_regularizer=tensorflow.keras.regularizers.L1(0.01)))
final_model.compile(optimizer='adam',
loss=tf.keras.losses.categorical_crossentropy,
metrics=['accuracy'])
final_model.summary()
early_stopping = EarlyStopping(monitor='val_loss', patience=20, verbose=1)
mc = ModelCheckpoint('final_fanout_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
h_callback = final_model.fit(X_train, y_train, epochs = 200,
validation_data=(X_val, y_val), callbacks=[early_stopping,mc,reduce_lr] , batch_size=64)
# Plot train vs test loss during training
plot_loss(h_callback.history['loss'], h_callback.history['val_loss'])
plot_accuracy(h_callback.history['accuracy'], h_callback.history['val_accuracy'])
fanout_model = load_model('final_fanout_model.h5')
# evaluate the model
_, train_acc = fanout_model.evaluate(X_train, y_train, verbose=0)
_, test_acc = fanout_model.evaluate(X_test, y_test, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))
precision,recall,f1 = get_metrics(fanout_model,X_test)
Model_scores = pd.concat([Model_scores,pd.DataFrame([['Fan out final model',test_acc,test_loss,precision,recall,f1]],columns=Model_scores.columns)],ignore_index=True)
visualkeras.layered_view(fanout_model)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 26, 26, 32) 320
leaky_re_lu (LeakyReLU) (None, 26, 26, 32) 0
max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0
)
batch_normalization (BatchN (None, 13, 13, 32) 128
ormalization)
dropout (Dropout) (None, 13, 13, 32) 0
conv2d_1 (Conv2D) (None, 9, 9, 64) 51264
leaky_re_lu_1 (LeakyReLU) (None, 9, 9, 64) 0
batch_normalization_1 (Batc (None, 9, 9, 64) 256
hNormalization)
max_pooling2d_1 (MaxPooling (None, 3, 3, 64) 0
2D)
dropout_1 (Dropout) (None, 3, 3, 64) 0
conv2d_2 (Conv2D) (None, 1, 1, 128) 73856
leaky_re_lu_2 (LeakyReLU) (None, 1, 1, 128) 0
batch_normalization_2 (Batc (None, 1, 1, 128) 512
hNormalization)
dropout_2 (Dropout) (None, 1, 1, 128) 0
flatten (Flatten) (None, 128) 0
dense (Dense) (None, 128) 16512
leaky_re_lu_3 (LeakyReLU) (None, 128) 0
dropout_3 (Dropout) (None, 128) 0
flatten_1 (Flatten) (None, 128) 0
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 144,138
Trainable params: 143,690
Non-trainable params: 448
_________________________________________________________________
Epoch 1/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4158 - accuracy: 0.8773
Epoch 1: val_accuracy improved from -inf to 0.92067, saving model to final_fanout_model.h5
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4160 - accuracy: 0.8773 - val_loss: 0.2843 - val_accuracy: 0.9207 - lr: 0.0010
Epoch 2/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4157 - accuracy: 0.8764
Epoch 2: val_accuracy improved from 0.92067 to 0.92208, saving model to final_fanout_model.h5
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4157 - accuracy: 0.8764 - val_loss: 0.2816 - val_accuracy: 0.9221 - lr: 0.0010
Epoch 3/200
2242/2250 [============================>.] - ETA: 0s - loss: 0.4142 - accuracy: 0.8777
Epoch 3: val_accuracy improved from 0.92208 to 0.92367, saving model to final_fanout_model.h5
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4141 - accuracy: 0.8778 - val_loss: 0.2806 - val_accuracy: 0.9237 - lr: 0.0010
Epoch 4/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4154 - accuracy: 0.8767
Epoch 4: val_accuracy did not improve from 0.92367
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4153 - accuracy: 0.8767 - val_loss: 0.3101 - val_accuracy: 0.9133 - lr: 0.0010
Epoch 5/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4164 - accuracy: 0.8762
Epoch 5: val_accuracy did not improve from 0.92367
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4164 - accuracy: 0.8762 - val_loss: 0.2981 - val_accuracy: 0.9203 - lr: 0.0010
Epoch 6/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4169 - accuracy: 0.8769
Epoch 6: val_accuracy did not improve from 0.92367
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4168 - accuracy: 0.8769 - val_loss: 0.2878 - val_accuracy: 0.9222 - lr: 0.0010
Epoch 7/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4151 - accuracy: 0.8771
Epoch 7: val_accuracy did not improve from 0.92367
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4151 - accuracy: 0.8771 - val_loss: 0.2843 - val_accuracy: 0.9222 - lr: 0.0010
Epoch 8/200
2242/2250 [============================>.] - ETA: 0s - loss: 0.4130 - accuracy: 0.8778
Epoch 8: val_accuracy did not improve from 0.92367
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4130 - accuracy: 0.8778 - val_loss: 0.2943 - val_accuracy: 0.9195 - lr: 0.0010
Epoch 9/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4155 - accuracy: 0.8779
Epoch 9: val_accuracy did not improve from 0.92367
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4155 - accuracy: 0.8779 - val_loss: 0.2845 - val_accuracy: 0.9218 - lr: 0.0010
Epoch 10/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4155 - accuracy: 0.8770
Epoch 10: val_accuracy did not improve from 0.92367
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4154 - accuracy: 0.8770 - val_loss: 0.2789 - val_accuracy: 0.9224 - lr: 0.0010
Epoch 11/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4141 - accuracy: 0.8771
Epoch 11: val_accuracy did not improve from 0.92367
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4141 - accuracy: 0.8771 - val_loss: 0.2807 - val_accuracy: 0.9219 - lr: 0.0010
Epoch 12/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4148 - accuracy: 0.8772
Epoch 12: val_accuracy did not improve from 0.92367
2250/2250 [==============================] - 15s 6ms/step - loss: 0.4147 - accuracy: 0.8773 - val_loss: 0.2889 - val_accuracy: 0.9219 - lr: 0.0010
Epoch 13/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4157 - accuracy: 0.8763
Epoch 13: val_accuracy did not improve from 0.92367
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4156 - accuracy: 0.8763 - val_loss: 0.2998 - val_accuracy: 0.9210 - lr: 0.0010
Epoch 14/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4146 - accuracy: 0.8768
Epoch 14: val_accuracy improved from 0.92367 to 0.92383, saving model to final_fanout_model.h5
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4147 - accuracy: 0.8768 - val_loss: 0.2858 - val_accuracy: 0.9238 - lr: 0.0010
Epoch 15/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4139 - accuracy: 0.8780
Epoch 15: val_accuracy did not improve from 0.92383
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4139 - accuracy: 0.8780 - val_loss: 0.2787 - val_accuracy: 0.9230 - lr: 0.0010
Epoch 16/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4131 - accuracy: 0.8775
Epoch 16: val_accuracy did not improve from 0.92383
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4131 - accuracy: 0.8775 - val_loss: 0.3020 - val_accuracy: 0.9168 - lr: 0.0010
Epoch 17/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4130 - accuracy: 0.8768
Epoch 17: val_accuracy did not improve from 0.92383
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4131 - accuracy: 0.8768 - val_loss: 0.3085 - val_accuracy: 0.9166 - lr: 0.0010
Epoch 18/200
2242/2250 [============================>.] - ETA: 0s - loss: 0.4133 - accuracy: 0.8771
Epoch 18: val_accuracy did not improve from 0.92383
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4133 - accuracy: 0.8771 - val_loss: 0.2963 - val_accuracy: 0.9187 - lr: 0.0010
Epoch 19/200
2242/2250 [============================>.] - ETA: 0s - loss: 0.4165 - accuracy: 0.8766
Epoch 19: val_accuracy improved from 0.92383 to 0.92567, saving model to final_fanout_model.h5
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4169 - accuracy: 0.8764 - val_loss: 0.2733 - val_accuracy: 0.9257 - lr: 0.0010
Epoch 20/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4163 - accuracy: 0.8764
Epoch 20: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4165 - accuracy: 0.8763 - val_loss: 0.2915 - val_accuracy: 0.9183 - lr: 0.0010
Epoch 21/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4128 - accuracy: 0.8779
Epoch 21: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4129 - accuracy: 0.8779 - val_loss: 0.2740 - val_accuracy: 0.9252 - lr: 0.0010
Epoch 22/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4145 - accuracy: 0.8771
Epoch 22: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4144 - accuracy: 0.8772 - val_loss: 0.2869 - val_accuracy: 0.9204 - lr: 0.0010
Epoch 23/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4133 - accuracy: 0.8773
Epoch 23: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4134 - accuracy: 0.8772 - val_loss: 0.2805 - val_accuracy: 0.9235 - lr: 0.0010
Epoch 24/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4131 - accuracy: 0.8769
Epoch 24: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4130 - accuracy: 0.8769 - val_loss: 0.2950 - val_accuracy: 0.9205 - lr: 0.0010
Epoch 25/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4120 - accuracy: 0.8782
Epoch 25: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4120 - accuracy: 0.8782 - val_loss: 0.2842 - val_accuracy: 0.9237 - lr: 0.0010
Epoch 26/200
2242/2250 [============================>.] - ETA: 0s - loss: 0.4143 - accuracy: 0.8780
Epoch 26: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4147 - accuracy: 0.8778 - val_loss: 0.2889 - val_accuracy: 0.9199 - lr: 0.0010
Epoch 27/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4116 - accuracy: 0.8781
Epoch 27: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4115 - accuracy: 0.8780 - val_loss: 0.2818 - val_accuracy: 0.9237 - lr: 0.0010
Epoch 28/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4136 - accuracy: 0.8767
Epoch 28: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4135 - accuracy: 0.8767 - val_loss: 0.2900 - val_accuracy: 0.9208 - lr: 0.0010
Epoch 29/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4141 - accuracy: 0.8772
Epoch 29: val_accuracy did not improve from 0.92567
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4143 - accuracy: 0.8771 - val_loss: 0.2864 - val_accuracy: 0.9196 - lr: 0.0010
Epoch 30/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4127 - accuracy: 0.8776
Epoch 30: val_accuracy improved from 0.92567 to 0.92600, saving model to final_fanout_model.h5
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4127 - accuracy: 0.8776 - val_loss: 0.2732 - val_accuracy: 0.9260 - lr: 0.0010
Epoch 31/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4129 - accuracy: 0.8770
Epoch 31: val_accuracy did not improve from 0.92600
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4129 - accuracy: 0.8770 - val_loss: 0.2859 - val_accuracy: 0.9230 - lr: 0.0010
Epoch 32/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4134 - accuracy: 0.8771
Epoch 32: val_accuracy did not improve from 0.92600
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4134 - accuracy: 0.8771 - val_loss: 0.2866 - val_accuracy: 0.9224 - lr: 0.0010
Epoch 33/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4120 - accuracy: 0.8780
Epoch 33: val_accuracy did not improve from 0.92600
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4118 - accuracy: 0.8780 - val_loss: 0.2893 - val_accuracy: 0.9199 - lr: 0.0010
Epoch 34/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4131 - accuracy: 0.8772
Epoch 34: val_accuracy did not improve from 0.92600
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4132 - accuracy: 0.8772 - val_loss: 0.2885 - val_accuracy: 0.9212 - lr: 0.0010
Epoch 35/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4138 - accuracy: 0.8782
Epoch 35: val_accuracy improved from 0.92600 to 0.92767, saving model to final_fanout_model.h5
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4137 - accuracy: 0.8782 - val_loss: 0.2770 - val_accuracy: 0.9277 - lr: 0.0010
Epoch 36/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4126 - accuracy: 0.8772
Epoch 36: val_accuracy did not improve from 0.92767
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4126 - accuracy: 0.8772 - val_loss: 0.2933 - val_accuracy: 0.9211 - lr: 0.0010
Epoch 37/200
2249/2250 [============================>.] - ETA: 0s - loss: 0.4110 - accuracy: 0.8788
Epoch 37: val_accuracy did not improve from 0.92767
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4110 - accuracy: 0.8788 - val_loss: 0.2814 - val_accuracy: 0.9203 - lr: 0.0010
Epoch 38/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4140 - accuracy: 0.8784
Epoch 38: val_accuracy did not improve from 0.92767
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4140 - accuracy: 0.8784 - val_loss: 0.2985 - val_accuracy: 0.9206 - lr: 0.0010
Epoch 39/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4109 - accuracy: 0.8783
Epoch 39: val_accuracy did not improve from 0.92767
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4108 - accuracy: 0.8784 - val_loss: 0.2903 - val_accuracy: 0.9214 - lr: 0.0010
Epoch 40/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4133 - accuracy: 0.8781
Epoch 40: val_accuracy did not improve from 0.92767
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4133 - accuracy: 0.8781 - val_loss: 0.2848 - val_accuracy: 0.9219 - lr: 0.0010
Epoch 41/200
2248/2250 [============================>.] - ETA: 0s - loss: 0.4125 - accuracy: 0.8783
Epoch 41: val_accuracy did not improve from 0.92767
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4125 - accuracy: 0.8783 - val_loss: 0.2772 - val_accuracy: 0.9244 - lr: 0.0010
Epoch 42/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4135 - accuracy: 0.8784
Epoch 42: val_accuracy did not improve from 0.92767
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4135 - accuracy: 0.8784 - val_loss: 0.2869 - val_accuracy: 0.9220 - lr: 0.0010
Epoch 43/200
2247/2250 [============================>.] - ETA: 0s - loss: 0.4128 - accuracy: 0.8780
Epoch 43: val_accuracy did not improve from 0.92767
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4129 - accuracy: 0.8780 - val_loss: 0.2915 - val_accuracy: 0.9246 - lr: 0.0010
Epoch 44/200
2242/2250 [============================>.] - ETA: 0s - loss: 0.4123 - accuracy: 0.8782
Epoch 44: val_accuracy did not improve from 0.92767
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4121 - accuracy: 0.8783 - val_loss: 0.2947 - val_accuracy: 0.9152 - lr: 0.0010
Epoch 45/200
2245/2250 [============================>.] - ETA: 0s - loss: 0.4123 - accuracy: 0.8782
Epoch 45: val_accuracy did not improve from 0.92767
2250/2250 [==============================] - 14s 6ms/step - loss: 0.4125 - accuracy: 0.8781 - val_loss: 0.2911 - val_accuracy: 0.9199 - lr: 0.0010
Epoch 46/200
2250/2250 [==============================] - ETA: 0s - loss: 0.4119 - accuracy: 0.8783
Epoch 46: val_accuracy did not improve from 0.92767
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4119 - accuracy: 0.8783 - val_loss: 0.2806 - val_accuracy: 0.9222 - lr: 0.0010
Epoch 47/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4116 - accuracy: 0.8789
Epoch 47: val_accuracy did not improve from 0.92767
2250/2250 [==============================] - 15s 7ms/step - loss: 0.4116 - accuracy: 0.8790 - val_loss: 0.2858 - val_accuracy: 0.9217 - lr: 0.0010
Epoch 48/200
2246/2250 [============================>.] - ETA: 0s - loss: 0.4137 - accuracy: 0.8776
Epoch 48: val_accuracy did not improve from 0.92767
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4137 - accuracy: 0.8776 - val_loss: 0.2788 - val_accuracy: 0.9216 - lr: 0.0010
Epoch 49/200
2244/2250 [============================>.] - ETA: 0s - loss: 0.4113 - accuracy: 0.8782
Epoch 49: val_accuracy did not improve from 0.92767
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4114 - accuracy: 0.8782 - val_loss: 0.2899 - val_accuracy: 0.9218 - lr: 0.0010
Epoch 50/200
2243/2250 [============================>.] - ETA: 0s - loss: 0.4119 - accuracy: 0.8778
Epoch 50: val_accuracy did not improve from 0.92767
2250/2250 [==============================] - 16s 7ms/step - loss: 0.4118 - accuracy: 0.8778 - val_loss: 0.2863 - val_accuracy: 0.9232 - lr: 0.0010
Epoch 50: early stopping
Train: 0.927, Test: 0.920 313/313 [==============================] - 1s 2ms/step
# plot actual and predicted images for 10 random images that were misclassified
pred = fanout_model.predict(X_test)
# get images where predictions and actual labels don't match
incorrect = np.nonzero(pred.argmax(axis=1) != y_test_label)[0]
# select 10 random images from those where prediction and actual label don't match
incorrect = np.random.choice(incorrect, 10)
# plot 10 random images where prediction and actual label don't match
types = [ 'T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
fig, ax = plt.subplots(2, 5, figsize=(15, 6))
for i, incorrect in enumerate(incorrect):
ax[i//5, i%5].imshow(X_test[incorrect].reshape(28, 28), cmap='gray', interpolation='none')
# title for each image will be actual and predicted label
ax[i//5, i%5].set_title("Predicted {}, Class {}".format(types[pred[incorrect].argmax()], types[y_test_label[incorrect]]))
ax[i//5, i%5].axis('off')
313/313 [==============================] - 1s 2ms/step
pred = fanout_model.predict(X_test)
pred = np.argmax(pred, axis=1)
classifation_matrix = confusion_matrix(y_test_label, pred)
# plot confusion matrix
plt.figure(figsize=(10,10))
sns.heatmap(classifation_matrix, annot=True, fmt="d")
plt.title("Confusion matrix")
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
313/313 [==============================] - 1s 2ms/step
Model_scores
| Model | Accuracy | Loss | Precision | Recall | F1 Score | |
|---|---|---|---|---|---|---|
| 0 | Dense 1 layer NN | 0.8794 | 0.366588 | 0.883387 | 0.8794 | 0.879616 |
| 1 | Dense NN 2 layer | 0.8748 | 0.347432 | 0.874826 | 0.8748 | 0.873875 |
| 2 | Dense NN 3 layer | 0.8755 | 0.346159 | 0.875871 | 0.8755 | 0.874770 |
| 3 | CNN linear activation | 0.8195 | 0.499430 | 0.829953 | 0.8195 | 0.821822 |
| 4 | CNN linear max pool | 0.8778 | 0.700755 | 0.879536 | 0.8778 | 0.878137 |
| 5 | CNN linear avg pool | 0.8325 | 0.494453 | 0.831806 | 0.8325 | 0.829783 |
| 6 | CNN linear w batch_norm | 0.8210 | 0.516410 | 0.823537 | 0.8210 | 0.818529 |
| 7 | linear batchnorm high epochs w rotated data aug | 0.8301 | 0.491786 | 0.829276 | 0.8301 | 0.828320 |
| 8 | CNN relu adam w dropout | 0.8721 | 1.109318 | 0.872658 | 0.8721 | 0.872179 |
| 9 | Machine Learning Mastery model | 0.8721 | 1.109318 | 0.897904 | 0.8943 | 0.894330 |
| 10 | Fan in final model | 0.9203 | 1.109318 | 0.919982 | 0.9203 | 0.920094 |
| 11 | Fan out final model | 0.9205 | 1.109318 | 0.920426 | 0.9205 | 0.920308 |
Overall , I think it was a good learning experience as I managed to improve and beat a model created by Machine Learning Master. I hope to learn more about CNNs and other deep learning models in the future